From cd749a9c07f1de2fb8affde90537efa4bc3e7c54 Mon Sep 17 00:00:00 2001
From: rdivacky <rdivacky@FreeBSD.org>
Date: Wed, 14 Oct 2009 17:57:32 +0000
Subject: Update llvm to r84119.

---
 lib/Transforms/Hello/Hello.cpp                     |    6 +-
 lib/Transforms/IPO/ArgumentPromotion.cpp           |  122 +-
 lib/Transforms/IPO/CMakeLists.txt                  |    7 +-
 lib/Transforms/IPO/ConstantMerge.cpp               |    2 +-
 lib/Transforms/IPO/DeadArgumentElimination.cpp     |   62 +-
 lib/Transforms/IPO/ExtractGV.cpp                   |    9 +-
 lib/Transforms/IPO/FunctionAttrs.cpp               |   38 +-
 lib/Transforms/IPO/GlobalDCE.cpp                   |    4 +
 lib/Transforms/IPO/GlobalOpt.cpp                   |  838 +++-
 lib/Transforms/IPO/IPConstantPropagation.cpp       |   10 +-
 lib/Transforms/IPO/IndMemRemoval.cpp               |   19 +-
 lib/Transforms/IPO/InlineAlways.cpp                |    2 +-
 lib/Transforms/IPO/InlineSimple.cpp                |    4 +-
 lib/Transforms/IPO/Inliner.cpp                     |  441 ++-
 lib/Transforms/IPO/Internalize.cpp                 |   13 +-
 lib/Transforms/IPO/LoopExtractor.cpp               |  108 +-
 lib/Transforms/IPO/LowerSetJmp.cpp                 |   62 +-
 lib/Transforms/IPO/MergeFunctions.cpp              |   45 +-
 lib/Transforms/IPO/PartialInlining.cpp             |    3 +-
 lib/Transforms/IPO/PruneEH.cpp                     |   17 +-
 lib/Transforms/IPO/RaiseAllocations.cpp            |   45 +-
 lib/Transforms/IPO/StripSymbols.cpp                |  180 +-
 lib/Transforms/IPO/StructRetPromotion.cpp          |  105 +-
 lib/Transforms/Instrumentation/BlockProfiling.cpp  |   31 +-
 lib/Transforms/Instrumentation/CMakeLists.txt      |    1 +
 lib/Transforms/Instrumentation/EdgeProfiling.cpp   |   36 +-
 .../Instrumentation/MaximumSpanningTree.h          |   95 +
 .../Instrumentation/OptimalEdgeProfiling.cpp       |  219 ++
 lib/Transforms/Instrumentation/ProfilingUtils.cpp  |   39 +-
 lib/Transforms/Instrumentation/RSProfiling.cpp     |   68 +-
 lib/Transforms/Makefile                            |    2 +-
 lib/Transforms/Scalar/ADCE.cpp                     |    4 +-
 lib/Transforms/Scalar/BasicBlockPlacement.cpp      |    7 +-
 lib/Transforms/Scalar/CMakeLists.txt               |    3 +-
 lib/Transforms/Scalar/CodeGenLICM.cpp              |  112 +
 lib/Transforms/Scalar/CodeGenPrepare.cpp           |   84 +-
 lib/Transforms/Scalar/CondPropagate.cpp            |   12 +-
 lib/Transforms/Scalar/ConstantProp.cpp             |    5 +-
 lib/Transforms/Scalar/DCE.cpp                      |    3 +-
 lib/Transforms/Scalar/DeadStoreElimination.cpp     |   67 +-
 lib/Transforms/Scalar/GVN.cpp                      | 1386 ++++---
 lib/Transforms/Scalar/IndVarSimplify.cpp           |  137 +-
 lib/Transforms/Scalar/InstructionCombining.cpp     | 4048 ++++++++++----------
 lib/Transforms/Scalar/JumpThreading.cpp            |  550 ++-
 lib/Transforms/Scalar/LICM.cpp                     |   51 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |   70 +-
 lib/Transforms/Scalar/LoopIndexSplit.cpp           |   38 +-
 lib/Transforms/Scalar/LoopRotation.cpp             |   72 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp       |  349 +-
 lib/Transforms/Scalar/LoopUnroll.cpp               |   58 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp             |  227 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp          |  230 +-
 lib/Transforms/Scalar/Reassociate.cpp              |   83 +-
 lib/Transforms/Scalar/Reg2Mem.cpp                  |  133 +-
 lib/Transforms/Scalar/SCCP.cpp                     |  148 +-
 lib/Transforms/Scalar/ScalarReplAggregates.cpp     |  199 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp          |   13 +-
 lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp |   15 +-
 lib/Transforms/Scalar/SimplifyLibCalls.cpp         | 1044 ++---
 lib/Transforms/Scalar/TailDuplication.cpp          |   20 +-
 lib/Transforms/Scalar/TailRecursionElimination.cpp |    5 +-
 lib/Transforms/Utils/AddrModeMatcher.cpp           |   15 +-
 lib/Transforms/Utils/BasicBlockUtils.cpp           |  118 +-
 lib/Transforms/Utils/BasicInliner.cpp              |   20 +-
 lib/Transforms/Utils/BreakCriticalEdges.cpp        |  118 +-
 lib/Transforms/Utils/CMakeLists.txt                |    7 +-
 lib/Transforms/Utils/CloneFunction.cpp             |   33 +-
 lib/Transforms/Utils/CloneModule.cpp               |   12 +-
 lib/Transforms/Utils/CodeExtractor.cpp             |  139 +-
 lib/Transforms/Utils/DemoteRegToStack.cpp          |    6 +-
 lib/Transforms/Utils/InlineFunction.cpp            |  314 +-
 lib/Transforms/Utils/InstructionNamer.cpp          |    4 +-
 lib/Transforms/Utils/LCSSA.cpp                     |  305 +-
 lib/Transforms/Utils/Local.cpp                     |   16 +-
 lib/Transforms/Utils/LoopSimplify.cpp              |  218 +-
 lib/Transforms/Utils/LowerAllocations.cpp          |   73 +-
 lib/Transforms/Utils/LowerInvoke.cpp               |  107 +-
 lib/Transforms/Utils/LowerSwitch.cpp               |   48 +-
 lib/Transforms/Utils/Mem2Reg.cpp                   |    2 +-
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp   |   54 +-
 lib/Transforms/Utils/SSAUpdater.cpp                |  335 ++
 lib/Transforms/Utils/SSI.cpp                       |  332 +-
 lib/Transforms/Utils/SimplifyCFG.cpp               |  241 +-
 lib/Transforms/Utils/UnifyFunctionExitNodes.cpp    |   18 +-
 lib/Transforms/Utils/UnrollLoop.cpp                |   27 +-
 lib/Transforms/Utils/ValueMapper.cpp               |   54 +-
 86 files changed, 8170 insertions(+), 6252 deletions(-)
 create mode 100644 lib/Transforms/Instrumentation/MaximumSpanningTree.h
 create mode 100644 lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
 create mode 100644 lib/Transforms/Scalar/CodeGenLICM.cpp
 create mode 100644 lib/Transforms/Utils/SSAUpdater.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index d07f613..8000d0d 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -32,7 +32,7 @@ namespace {
       HelloCounter++;
       std::string fname = F.getName();
       EscapeString(fname);
-      cerr << "Hello: " << fname << "\n";
+      errs() << "Hello: " << fname << "\n";
       return false;
     }
   };
@@ -51,7 +51,7 @@ namespace {
       HelloCounter++;
       std::string fname = F.getName();
       EscapeString(fname);
-      cerr << "Hello: " << fname << "\n";
+      errs() << "Hello: " << fname << "\n";
       return false;
     }
 
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index a612634..5b91f3d 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,16 +36,18 @@
 #include "llvm/Module.h"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Compiler.h"
 #include <set>
 using namespace llvm;
 
@@ -60,11 +62,10 @@ namespace {
   struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetData>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
       : CallGraphSCCPass(&ID), maxElements(maxElements) {}
@@ -73,11 +74,11 @@ namespace {
     typedef std::vector<uint64_t> IndicesVector;
 
   private:
-    bool PromoteArguments(CallGraphNode *CGN);
+    CallGraphNode *PromoteArguments(CallGraphNode *CGN);
     bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
-    Function *DoPromotion(Function *F,
-                          SmallPtrSet<Argument*, 8> &ArgsToPromote,
-                          SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+    CallGraphNode *DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
     /// The maximum number of elements to expand, or 0 for unlimited.
     unsigned maxElements;
   };
@@ -91,14 +92,17 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
 }
 
-bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = false, LocalChange;
 
   do {  // Iterate until we stop promoting from this SCC.
     LocalChange = false;
     // Attempt to promote arguments from all functions in this SCC.
     for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      LocalChange |= PromoteArguments(SCC[i]);
+      if (CallGraphNode *CGN = PromoteArguments(SCC[i])) {
+        LocalChange = true;
+        SCC[i] = CGN;
+      }
     Changed |= LocalChange;               // Remember that we changed something.
   } while (LocalChange);
 
@@ -110,11 +114,11 @@ bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
 /// example, all callers are direct).  If safe to promote some arguments, it
 /// calls the DoPromotion method.
 ///
-bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   Function *F = CGN->getFunction();
 
   // Make sure that it is local to this module.
-  if (!F || !F->hasLocalLinkage()) return false;
+  if (!F || !F->hasLocalLinkage()) return 0;
 
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
@@ -123,12 +127,12 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
        I != E; ++I, ++ArgNo)
     if (isa<PointerType>(I->getType()))
       PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
-  if (PointerArgs.empty()) return false;
+  if (PointerArgs.empty()) return 0;
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.
   if (F->hasAddressTaken())
-    return false;
+    return 0;
 
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
@@ -144,9 +148,9 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
       if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (maxElements > 0 && STy->getNumElements() > maxElements) {
-          DOUT << "argpromotion disable promoting argument '"
-               << PtrArg->getName() << "' because it would require adding more "
-               << "than " << maxElements << " arguments to the function.\n";
+          DEBUG(errs() << "argpromotion disable promoting argument '"
+                << PtrArg->getName() << "' because it would require adding more"
+                << " than " << maxElements << " arguments to the function.\n");
         } else {
           // If all the elements are single-value types, we can promote it.
           bool AllSimple = true;
@@ -173,13 +177,10 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   }
 
   // No promotable pointer arguments.
-  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false;
-
-  Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
+    return 0;
 
-  // Update the call graph to know that the function has been transformed.
-  getAnalysis<CallGraph>().changeFunction(F, NewF);
-  return true;
+  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
 }
 
 /// IsAlwaysValidPointer - Return true if the specified pointer is always legal
@@ -409,9 +410,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
     // to do.
     if (ToPromote.find(Operands) == ToPromote.end()) {
       if (maxElements > 0 && ToPromote.size() == maxElements) {
-        DOUT << "argpromotion not promoting argument '"
-             << Arg->getName() << "' because it would require adding more "
-             << "than " << maxElements << " arguments to the function.\n";
+        DEBUG(errs() << "argpromotion not promoting argument '"
+              << Arg->getName() << "' because it would require adding more "
+              << "than " << maxElements << " arguments to the function.\n");
         // We limit aggregate promotion to only promoting up to a fixed number
         // of elements of the aggregate.
         return false;
@@ -432,7 +433,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
   SmallPtrSet<BasicBlock*, 16> TranspBlocks;
 
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  TargetData &TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false; // Without TargetData, assume the worst.
 
   for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
     // Check to see if the load is invalidated from the start of the block to
@@ -442,7 +444,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
 
     const PointerType *LoadTy =
       cast<PointerType>(Load->getPointerOperand()->getType());
-    unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType());
+    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
 
     if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
       return false;  // Pointer is invalidated!
@@ -467,8 +469,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
 /// DoPromotion - This method actually performs the promotion of the specified
 /// arguments, and returns the new function.  At this point, we know that it's
 /// safe to do so.
-Function *ArgPromotion::DoPromotion(Function *F,
-                                    SmallPtrSet<Argument*, 8> &ArgsToPromote,
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
 
   // Start by computing a new prototype for the function, which is the same as
@@ -581,19 +583,24 @@ Function *ArgPromotion::DoPromotion(Function *F,
   bool ExtraArgHack = false;
   if (Params.empty() && FTy->isVarArg()) {
     ExtraArgHack = true;
-    Params.push_back(Type::Int32Ty);
+    Params.push_back(Type::getInt32Ty(F->getContext()));
   }
 
   // Construct the new function type using the new arguments.
   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
 
-  // Create the new function body and insert it into the module...
+  // Create the new function body and insert it into the module.
   Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
   NF->copyAttributesFrom(F);
 
+  
+  DEBUG(errs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+        << "From: " << *F);
+  
   // Recompute the parameter attributes list based on the new arguments for
   // the function.
-  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                     AttributesVec.end()));
   AttributesVec.clear();
 
   F->getParent()->getFunctionList().insert(F, NF);
@@ -606,6 +613,10 @@ Function *ArgPromotion::DoPromotion(Function *F,
   // Get the callgraph information that we need to update to reflect our
   // changes.
   CallGraph &CG = getAnalysis<CallGraph>();
+  
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+  
 
   // Loop over all of the callers of the function, transforming the call sites
   // to pass in the loaded pointers.
@@ -636,9 +647,10 @@ Function *ArgPromotion::DoPromotion(Function *F,
         // Emit a GEP and load for each element of the struct.
         const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
         const StructType *STy = cast<StructType>(AgTy);
-        Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+        Value *Idxs[2] = {
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
+          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
           Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
                                                  (*AI)->getName()+"."+utostr(i),
                                                  Call);
@@ -662,7 +674,9 @@ Function *ArgPromotion::DoPromotion(Function *F,
                  IE = SI->end(); II != IE; ++II) {
               // Use i32 to index structs, and i64 for others (pointers/arrays).
               // This satisfies GEP constraints.
-              const Type *IdxTy = (isa<StructType>(ElTy) ? Type::Int32Ty : Type::Int64Ty);
+              const Type *IdxTy = (isa<StructType>(ElTy) ?
+                    Type::getInt32Ty(F->getContext()) : 
+                    Type::getInt64Ty(F->getContext()));
               Ops.push_back(ConstantInt::get(IdxTy, *II));
               // Keep track of the type we're currently indexing
               ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
@@ -679,7 +693,7 @@ Function *ArgPromotion::DoPromotion(Function *F,
       }
 
     if (ExtraArgHack)
-      Args.push_back(Constant::getNullValue(Type::Int32Ty));
+      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
 
     // Push any varargs arguments on the list
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
@@ -715,7 +729,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
     AA.replaceWithNewValue(Call, New);
 
     // Update the callgraph to know that the callsite has been transformed.
-    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
 
     if (!Call->use_empty()) {
       Call->replaceAllUsesWith(New);
@@ -756,14 +771,16 @@ Function *ArgPromotion::DoPromotion(Function *F,
       const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
       Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
       const StructType *STy = cast<StructType>(AgTy);
-      Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 };
+      Value *Idxs[2] = {
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
 
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-        Idxs[1] = ConstantInt::get(Type::Int32Ty, i);
-        std::string Name = TheAlloca->getName()+"."+utostr(i);
-        Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
-                                               Name, InsertPt);
-        I2->setName(I->getName()+"."+utostr(i));
+        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+        Value *Idx = 
+          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+                                    TheAlloca->getName()+"."+Twine(i), 
+                                    InsertPt);
+        I2->setName(I->getName()+"."+Twine(i));
         new StoreInst(I2++, Idx, InsertPt);
       }
 
@@ -792,8 +809,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
         LI->replaceAllUsesWith(I2);
         AA.replaceWithNewValue(LI, I2);
         LI->eraseFromParent();
-        DOUT << "*** Promoted load of argument '" << I->getName()
-             << "' in function '" << F->getName() << "'\n";
+        DEBUG(errs() << "*** Promoted load of argument '" << I->getName()
+              << "' in function '" << F->getName() << "'\n");
       } else {
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
         IndicesVector Operands;
@@ -819,8 +836,8 @@ Function *ArgPromotion::DoPromotion(Function *F,
         NewName += ".val";
         TheArg->setName(NewName);
 
-        DOUT << "*** Promoted agg argument '" << TheArg->getName()
-             << "' of function '" << NF->getName() << "'\n";
+        DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName()
+              << "' of function '" << NF->getName() << "'\n");
 
         // All of the uses must be load instructions.  Replace them all with
         // the argument specified by ArgNo.
@@ -842,13 +859,18 @@ Function *ArgPromotion::DoPromotion(Function *F,
 
   // Notify the alias analysis implementation that we inserted a new argument.
   if (ExtraArgHack)
-    AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin());
+    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
+                 NF->arg_begin());
 
 
   // Tell the alias analysis that the old function is about to disappear.
   AA.replaceWithNewValue(F, NF);
 
+  
+  NF_CGN->stealCalledFunctionsFrom(CG[F]);
+  
   // Now that the old function is dead, delete it.
-  F->eraseFromParent();
-  return NF;
+  delete CG.removeFunctionFromModule(F);
+  
+  return NF_CGN;
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 1438b48..ec0f1e1 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -1,18 +1,19 @@
 add_llvm_library(LLVMipo
-  FunctionAttrs.cpp
   ArgumentPromotion.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
   DeadTypeElimination.cpp
   ExtractGV.cpp
+  FunctionAttrs.cpp
   GlobalDCE.cpp
   GlobalOpt.cpp
+  IPConstantPropagation.cpp
+  IPO.cpp
   IndMemRemoval.cpp
   InlineAlways.cpp
-  Inliner.cpp
   InlineSimple.cpp
+  Inliner.cpp
   Internalize.cpp
-  IPConstantPropagation.cpp
   LoopExtractor.cpp
   LowerSetJmp.cpp
   MergeFunctions.cpp
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 237e6db..c1a1045 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -78,7 +78,7 @@ bool ConstantMerge::runOnModule(Module &M) {
       }
       
       // Only process constants with initializers.
-      if (GV->isConstant() && GV->hasInitializer()) {
+      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
         Constant *Init = GV->getInitializer();
 
         // Check to see if the initializer is already known.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index e480dad..79a32f0 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -24,10 +24,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -72,7 +74,7 @@ namespace {
 
       std::string getDescription() const {
         return std::string((IsArg ? "Argument #" : "Return value #")) 
-               + utostr(Idx) + " of function " + F->getName();
+               + utostr(Idx) + " of function " + F->getNameStr();
       }
     };
 
@@ -195,8 +197,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but doesn't have isVarArg set.
   const FunctionType *FTy = Fn.getFunctionType();
+  
   std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
-  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+                                                Params, false);
   unsigned NumArgs = Params.size();
 
   // Create the new function body and insert it into the module...
@@ -277,7 +281,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
 /// for void functions and 1 for functions not returning a struct. It returns
 /// the number of struct elements for functions returning a struct.
 static unsigned NumRetVals(const Function *F) {
-  if (F->getReturnType() == Type::VoidTy)
+  if (F->getReturnType() == Type::getVoidTy(F->getContext()))
     return 0;
   else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
     return STy->getNumElements();
@@ -422,7 +426,7 @@ void DAE::SurveyFunction(Function &F) {
     return;
   }
 
-  DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
   // Keep track of the number of live retvals, so we can skip checks once all
   // of them turn out to be live.
   unsigned NumLiveRetVals = 0;
@@ -485,7 +489,7 @@ void DAE::SurveyFunction(Function &F) {
   for (unsigned i = 0; i != RetCount; ++i)
     MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
 
-  DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
 
   // Now, check all of our arguments.
   unsigned i = 0;
@@ -527,7 +531,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
 /// mark any values that are used as this function's parameters or by its return
 /// values (according to Uses) live as well.
 void DAE::MarkLive(const Function &F) {
-    DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n";
+  DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
     // Mark the function as live.
     LiveFunctions.insert(&F);
     // Mark all arguments as live.
@@ -548,7 +552,7 @@ void DAE::MarkLive(const RetOrArg &RA) {
   if (!LiveValues.insert(RA).second)
     return; // We were already marked Live.
 
-  DOUT << "DAE - Marking " << RA.getDescription() << " live\n";
+  DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n");
   PropagateLiveness(RA);
 }
 
@@ -596,11 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   const Type *RetTy = FTy->getReturnType();
   const Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
+  
   // -1 means unused, other numbers are the new index
   SmallVector<int, 5> NewRetIdxs(RetCount, -1);
   std::vector<const Type*> RetTypes;
-  if (RetTy == Type::VoidTy) {
-    NRetTy = Type::VoidTy;
+  if (RetTy == Type::getVoidTy(F->getContext())) {
+    NRetTy = Type::getVoidTy(F->getContext());
   } else {
     const StructType *STy = dyn_cast<StructType>(RetTy);
     if (STy)
@@ -612,8 +617,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           NewRetIdxs[i] = RetTypes.size() - 1;
         } else {
           ++NumRetValsEliminated;
-          DOUT << "DAE - Removing return value " << i << " from "
-               << F->getNameStart() << "\n";
+          DEBUG(errs() << "DAE - Removing return value " << i << " from "
+                << F->getName() << "\n");
         }
       }
     else
@@ -622,8 +627,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         RetTypes.push_back(RetTy);
         NewRetIdxs[0] = 0;
       } else {
-        DOUT << "DAE - Removing return value from " << F->getNameStart()
-             << "\n";
+        DEBUG(errs() << "DAE - Removing return value from " << F->getName()
+              << "\n");
         ++NumRetValsEliminated;
       }
     if (RetTypes.size() > 1)
@@ -633,14 +638,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       // something and {} into void.
       // Make the new struct packed if we used to return a packed struct
       // already.
-      NRetTy = StructType::get(RetTypes, STy->isPacked());
+      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
     else if (RetTypes.size() == 1)
       // One return type? Just a simple value then, but only if we didn't use to
       // return a struct with that simple value before.
       NRetTy = RetTypes.front();
     else if (RetTypes.size() == 0)
       // No return types? Make it void, but only if we didn't use to return {}.
-      NRetTy = Type::VoidTy;
+      NRetTy = Type::getVoidTy(F->getContext());
   }
 
   assert(NRetTy && "No new return type found?");
@@ -649,7 +654,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // values. Otherwise, ensure that we don't have any conflicting attributes
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
-  if (NRetTy == Type::VoidTy)
+  if (NRetTy == Type::getVoidTy(F->getContext()))
     RAttrs &= ~Attribute::typeIncompatible(NRetTy);
   else
     assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 
@@ -677,8 +682,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
     } else {
       ++NumArgumentsEliminated;
-      DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart()
-           << ") from " << F->getNameStart() << "\n";
+      DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName()
+            << ") from " << F->getName() << "\n");
     }
   }
 
@@ -697,11 +702,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   bool ExtraArgHack = false;
   if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {
     ExtraArgHack = true;
-    Params.push_back(Type::Int32Ty);
+    Params.push_back(Type::getInt32Ty(F->getContext()));
   }
 
   // Create the new function type based on the recomputed parameters.
-  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+  FunctionType *NFTy = FunctionType::get(NRetTy, Params,
+                                                FTy->isVarArg());
 
   // No change?
   if (NFTy == FTy)
@@ -750,7 +756,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       }
 
     if (ExtraArgHack)
-      Args.push_back(UndefValue::get(Type::Int32Ty));
+      Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext())));
 
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
@@ -786,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         // Return type not changed? Just replace users then.
         Call->replaceAllUsesWith(New);
         New->takeName(Call);
-      } else if (New->getType() == Type::VoidTy) {
+      } else if (New->getType() == Type::getVoidTy(F->getContext())) {
         // Our return value has uses, but they will get removed later on.
         // Replace by null for now.
         Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
@@ -806,7 +812,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         // extract/insertvalue chaining and let instcombine clean that up.
         //
         // Start out building up our return value from undef
-        Value *RetVal = llvm::UndefValue::get(RetTy);
+        Value *RetVal = UndefValue::get(RetTy);
         for (unsigned i = 0; i != RetCount; ++i)
           if (NewRetIdxs[i] != -1) {
             Value *V;
@@ -862,7 +868,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
         Value *RetVal;
 
-        if (NFTy->getReturnType() == Type::VoidTy) {
+        if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
           RetVal = 0;
         } else {
           assert (isa<StructType>(RetTy));
@@ -873,7 +879,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           // clean that up.
           Value *OldRet = RI->getOperand(0);
           // Start out building up our return value from undef
-          RetVal = llvm::UndefValue::get(NRetTy);
+          RetVal = UndefValue::get(NRetTy);
           for (unsigned i = 0; i != RetCount; ++i)
             if (NewRetIdxs[i] != -1) {
               ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
@@ -893,7 +899,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         }
         // Replace the return instruction with one returning the new return
         // value (possibly 0 if we became void).
-        ReturnInst::Create(RetVal, RI);
+        ReturnInst::Create(F->getContext(), RetVal, RI);
         BB->getInstList().erase(RI);
       }
 
@@ -910,7 +916,7 @@ bool DAE::runOnModule(Module &M) {
   // removed.  We can do this if they never call va_start.  This loop cannot be
   // fused with the next loop, because deleting a function invalidates
   // information computed while surveying other functions.
-  DOUT << "DAE - Deleting dead varargs\n";
+  DEBUG(errs() << "DAE - Deleting dead varargs\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
     Function &F = *I++;
     if (F.getFunctionType()->isVarArg())
@@ -921,7 +927,7 @@ bool DAE::runOnModule(Module &M) {
   // We assume all arguments are dead unless proven otherwise (allowing us to
   // determine that dead arguments passed into recursive functions are dead).
   //
-  DOUT << "DAE - Determining liveness\n";
+  DEBUG(errs() << "DAE - Determining liveness\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     SurveyFunction(*I);
   
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 0c529d2..191100c 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Constants.h"
@@ -43,6 +44,7 @@ namespace {
         return false;  // Nothing to extract
       }
       
+      
       if (deleteStuff)
         return deleteGV();
       M.setModuleInlineAsm("");
@@ -99,7 +101,8 @@ namespace {
       // by putting them in the used array
       {
         std::vector<Constant *> AUGs;
-        const Type *SBP= PointerType::getUnqual(Type::Int8Ty);
+        const Type *SBP=
+              Type::getInt8PtrTy(M.getContext());
         for (std::vector<GlobalValue*>::iterator GI = Named.begin(), 
                GE = Named.end(); GI != GE; ++GI) {
           (*GI)->setLinkage(GlobalValue::ExternalLinkage);
@@ -107,9 +110,9 @@ namespace {
         }
         ArrayType *AT = ArrayType::get(SBP, AUGs.size());
         Constant *Init = ConstantArray::get(AT, AUGs);
-        GlobalValue *gv = new GlobalVariable(AT, false, 
+        GlobalValue *gv = new GlobalVariable(M, AT, false, 
                                              GlobalValue::AppendingLinkage, 
-                                             Init, "llvm.used", &M);
+                                             Init, "llvm.used");
         gv->setSection("llvm.metadata");
       }
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index e831524..7edaa7f 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
@@ -44,7 +45,7 @@ namespace {
     FunctionAttrs() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
 
     // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
     bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC);
@@ -54,7 +55,7 @@ namespace {
 
     // IsFunctionMallocLike - Does this function allocate new memory?
     bool IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<CallGraphNode*, 8> &) const;
+                              SmallPtrSet<Function*, 8> &) const;
 
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
     bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC);
@@ -93,13 +94,12 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) {
 
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
 bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
-  SmallPtrSet<CallGraphNode*, 8> SCCNodes;
-  CallGraph &CG = getAnalysis<CallGraph>();
+  SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+    SCCNodes.insert(SCC[i]->getFunction());
 
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
@@ -133,9 +133,9 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
       // Some instructions can be ignored even if they read or write memory.
       // Detect these now, skipping to the next instruction if one is found.
       CallSite CS = CallSite::get(I);
-      if (CS.getInstruction()) {
+      if (CS.getInstruction() && CS.getCalledFunction()) {
         // Ignore calls to functions in the same SCC.
-        if (SCCNodes.count(CG[CS.getCalledFunction()]))
+        if (SCCNodes.count(CS.getCalledFunction()))
           continue;
       } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         // Ignore loads from local memory.
@@ -154,7 +154,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
         return false;
 
       if (isa<MallocInst>(I))
-        // MallocInst claims not to write memory!  PR3754.
+        // malloc claims not to write memory!  PR3754.
         return false;
 
       // If this instruction may read memory, remember that.
@@ -226,9 +226,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
 /// IsFunctionMallocLike - A function is malloc-like if it returns either null
 /// or a pointer that doesn't alias any other pointer visible to the caller.
 bool FunctionAttrs::IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<CallGraphNode*, 8> &SCCNodes) const {
-  CallGraph &CG = getAnalysis<CallGraph>();
-
+                              SmallPtrSet<Function*, 8> &SCCNodes) const {
   UniqueVector<Value *> FlowsToReturn;
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -250,32 +248,36 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
     if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
       switch (RVI->getOpcode()) {
         // Extend the analysis by looking upwards.
-        case Instruction::GetElementPtr:
         case Instruction::BitCast:
+        case Instruction::GetElementPtr:
           FlowsToReturn.insert(RVI->getOperand(0));
           continue;
         case Instruction::Select: {
           SelectInst *SI = cast<SelectInst>(RVI);
           FlowsToReturn.insert(SI->getTrueValue());
           FlowsToReturn.insert(SI->getFalseValue());
-        } continue;
+          continue;
+        }
         case Instruction::PHI: {
           PHINode *PN = cast<PHINode>(RVI);
           for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
             FlowsToReturn.insert(PN->getIncomingValue(i));
-        } continue;
+          continue;
+        }
 
         // Check whether the pointer came from an allocation.
         case Instruction::Alloca:
         case Instruction::Malloc:
           break;
         case Instruction::Call:
+          if (isMalloc(RVI))
+            break;
         case Instruction::Invoke: {
           CallSite CS(RVI);
           if (CS.paramHasAttr(0, Attribute::NoAlias))
             break;
           if (CS.getCalledFunction() &&
-              SCCNodes.count(CG[CS.getCalledFunction()]))
+              SCCNodes.count(CS.getCalledFunction()))
             break;
         } // fall-through
         default:
@@ -291,12 +293,12 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 
 /// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
 bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
-  SmallPtrSet<CallGraphNode*, 8> SCCNodes;
+  SmallPtrSet<Function*, 8> SCCNodes;
 
   // Fill SCCNodes with the elements of the SCC.  Used for quickly
   // looking up whether a given CallGraphNode is in this SCC.
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    SCCNodes.insert(SCC[i]);
+    SCCNodes.insert(SCC[i]->getFunction());
 
   // Check each function in turn, determining which functions return noalias
   // pointers.
@@ -339,7 +341,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   return MadeChange;
 }
 
-bool FunctionAttrs::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 9c652b9..09f9e7c 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -58,6 +58,7 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
 
 bool GlobalDCE::runOnModule(Module &M) {
   bool Changed = false;
+  
   // Loop over the module, adding globals which are obviously necessary.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     Changed |= RemoveUnusedGlobalValue(*I);
@@ -147,6 +148,9 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Make sure that all memory is released
   AliveGlobals.clear();
+
+  // Remove dead metadata.
+  Changed |= M.getContext().RemoveDeadMetadata();
   return Changed;
 }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7fe097c..a44386e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,20 +20,23 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
@@ -56,7 +59,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 namespace {
   struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(&ID) {}
@@ -244,7 +246,8 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
   return false;
 }
 
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
+                                             LLVMContext &Context) {
   ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
   if (!CI) return 0;
   unsigned IdxV = CI->getZExtValue();
@@ -280,7 +283,8 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
 /// users of the global, cleaning up the obvious ones.  This is largely just a
 /// quick scan over the use list to clean up the easy and obvious cruft.  This
 /// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+                                       LLVMContext &Context) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
     User *U = *UI++;
@@ -301,11 +305,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
         Constant *SubInit = 0;
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
-        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);
       } else if (CE->getOpcode() == Instruction::BitCast && 
                  isa<PointerType>(CE->getType())) {
         // Pointer cast, delete any stores and memsets to the global.
-        Changed |= CleanupConstantGlobalUsers(CE, 0);
+        Changed |= CleanupConstantGlobalUsers(CE, 0, Context);
       }
 
       if (CE->use_empty()) {
@@ -319,11 +323,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
         ConstantExpr *CE = 
-          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
       }
-      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);
 
       if (GEP->use_empty()) {
         GEP->eraseFromParent();
@@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
-        CleanupConstantGlobalUsers(V, Init);
+        CleanupConstantGlobalUsers(V, Init, Context);
         return true;
       }
     }
@@ -423,13 +427,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
     // Scalar replacing *just* the outer index of the array is probably not
     // going to be a win anyway, so just give up.
     for (++GEPI; // Skip array index.
-         GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI));
+         GEPI != E;
          ++GEPI) {
       uint64_t NumElements;
       if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
         NumElements = SubArrayTy->getNumElements();
-      else
-        NumElements = cast<VectorType>(*GEPI)->getNumElements();
+      else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+        NumElements = SubVectorTy->getNumElements();
+      else {
+        assert(isa<StructType>(*GEPI) &&
+               "Indexed GEP type is not array, vector, or struct!");
+        continue;
+      }
       
       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
@@ -461,7 +470,8 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
 /// behavior of the program in a more fine-grained way.  We have determined that
 /// this transformation is safe already.  We return the first global variable we
 /// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
+                                 LLVMContext &Context) {
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
     return 0;
@@ -483,14 +493,15 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     const StructLayout &Layout = *TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                            ConstantInt::get(Type::Int32Ty, i));
+                                ConstantInt::get(Type::getInt32Ty(Context), i),
+                                    Context);
       assert(In && "Couldn't get element of initializer?");
-      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+      GlobalVariable *NGV = new GlobalVariable(Context,
+                                               STy->getElementType(i), false,
                                                GlobalVariable::InternalLinkage,
-                                               In, GV->getName()+"."+utostr(i),
-                                               (Module *)NULL,
+                                               In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
-                                               GV->getType()->getAddressSpace());
+                                              GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
       
@@ -517,15 +528,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
       Constant *In = getAggregateConstantElement(Init,
-                                            ConstantInt::get(Type::Int32Ty, i));
+                                ConstantInt::get(Type::getInt32Ty(Context), i),
+                                    Context);
       assert(In && "Couldn't get element of initializer?");
 
-      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+      GlobalVariable *NGV = new GlobalVariable(Context,
+                                               STy->getElementType(), false,
                                                GlobalVariable::InternalLinkage,
-                                               In, GV->getName()+"."+utostr(i),
-                                               (Module *)NULL,
+                                               In, GV->getName()+"."+Twine(i),
                                                GV->isThreadLocal(),
-                                               GV->getType()->getAddressSpace());
+                                              GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
       
@@ -541,9 +553,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   if (NewGlobals.empty())
     return 0;
 
-  DOUT << "PERFORMING GLOBAL SRA ON: " << *GV;
+  DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
-  Constant *NullInt = Constant::getNullValue(Type::Int32Ty);
+  Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));
 
   // Loop over all of the uses of the global, replacing the constantexpr geps,
   // with smaller constantexpr geps or direct references.
@@ -577,7 +589,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
           Idxs.push_back(GEPI->getOperand(i));
         NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
-                                           GEPI->getName()+"."+utostr(Val), GEPI);
+                                           GEPI->getName()+"."+Twine(Val),GEPI);
       }
     }
     GEP->replaceAllUsesWith(NewPtr);
@@ -667,7 +679,8 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
   return true;
 }
 
-static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
+                                           LLVMContext &Context) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
     Instruction *I = cast<Instruction>(*UI++);
@@ -700,7 +713,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
                                 ConstantExpr::getCast(CI->getOpcode(),
-                                                      NewV, CI->getType()));
+                                                NewV, CI->getType()), Context);
       if (CI->use_empty()) {
         Changed = true;
         CI->eraseFromParent();
@@ -717,8 +730,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
-                                ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
-                                                               Idxs.size()));
+                          ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+                                                        Idxs.size()), Context);
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -734,7 +747,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
 /// value stored into it.  If there are uses of the loaded value that would trap
 /// if the loaded value is dynamically null, then we know that they cannot be
 /// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+                                            LLVMContext &Context) {
   bool Changed = false;
 
   // Keep track of whether we are able to remove all the uses of the global
@@ -745,7 +759,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
     User *GlobalUser = *GUI++;
     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
-      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);
       // If we were able to delete all uses of the loads
       if (LI->use_empty()) {
         LI->eraseFromParent();
@@ -768,15 +782,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   }
 
   if (Changed) {
-    DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV;
+    DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
     ++NumGlobUses;
   }
 
   // If we nuked all of the loads, then none of the stores are needed either,
   // nor is the global.
   if (AllNonStoreUsesGone) {
-    DOUT << "  *** GLOBAL NOW DEAD!\n";
-    CleanupConstantGlobalUsers(GV, 0);
+    DEBUG(errs() << "  *** GLOBAL NOW DEAD!\n");
+    CleanupConstantGlobalUsers(GV, 0, Context);
     if (GV->use_empty()) {
       GV->eraseFromParent();
       ++NumDeleted;
@@ -788,10 +802,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
 
 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
 /// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V) {
+static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
-      if (Constant *NewC = ConstantFoldInstruction(I)) {
+      if (Constant *NewC = ConstantFoldInstruction(I, Context)) {
         I->replaceAllUsesWith(NewC);
 
         // Advance UI to the next non-I use to avoid invalidating it!
@@ -808,8 +822,9 @@ static void ConstantPropUsersOf(Value *V) {
 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
 /// malloc into a global, and any loads of GV as uses of the new global.
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
-                                                     MallocInst *MI) {
-  DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI;
+                                                     MallocInst *MI,
+                                                     LLVMContext &Context) {
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI);
   ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());
 
   if (NElements->getZExtValue() != 1) {
@@ -818,10 +833,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
     Type *NewTy = ArrayType::get(MI->getAllocatedType(),
                                  NElements->getZExtValue());
     MallocInst *NewMI =
-      new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty),
+      new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)),
                      MI->getAlignment(), MI->getName(), MI);
     Value* Indices[2];
-    Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty);
+    Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context));
     Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
                                               NewMI->getName()+".el0", MI);
     MI->replaceAllUsesWith(NewGEP);
@@ -831,17 +846,17 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
+  // FIXME: This new global should have the alignment returned by malloc.  Code
+  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+  // this would only guarantee some lower alignment.
   Constant *Init = UndefValue::get(MI->getAllocatedType());
-  GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false,
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+                                             MI->getAllocatedType(), false,
                                              GlobalValue::InternalLinkage, Init,
                                              GV->getName()+".body",
-                                             (Module *)NULL,
+                                             GV,
                                              GV->isThreadLocal());
-  // FIXME: This new global should have the alignment returned by malloc.  Code
-  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
-  // this would only guarantee some lower alignment.
-  GV->getParent()->getGlobalList().insert(GV, NewGV);
-
+  
   // Anything that used the malloc now uses the global directly.
   MI->replaceAllUsesWith(NewGV);
 
@@ -853,9 +868,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // If there is a comparison against null, we will insert a global bool to
   // keep track of whether the global was initialized yet or not.
   GlobalVariable *InitBool =
-    new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage,
-                       ConstantInt::getFalse(), GV->getName()+".init",
-                       (Module *)NULL, GV->isThreadLocal());
+    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(Context), GV->getName()+".init",
+                       GV->isThreadLocal());
   bool InitBoolUsed = false;
 
   // Loop over all uses of GV, processing them in turn.
@@ -872,10 +888,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
           Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);
           InitBoolUsed = true;
           switch (CI->getPredicate()) {
-          default: assert(0 && "Unknown ICmp Predicate!");
+          default: llvm_unreachable("Unknown ICmp Predicate!");
           case ICmpInst::ICMP_ULT:
           case ICmpInst::ICMP_SLT:
-            LV = ConstantInt::getFalse();   // X < null -> always false
+            LV = ConstantInt::getFalse(Context);   // X < null -> always false
             break;
           case ICmpInst::ICMP_ULE:
           case ICmpInst::ICMP_SLE:
@@ -897,7 +913,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
     } else {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       // The global is initialized when the store to it occurs.
-      new StoreInst(ConstantInt::getTrue(), InitBool, SI);
+      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
       SI->eraseFromParent();
     }
 
@@ -917,9 +933,141 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // To further other optimizations, loop over all users of NewGV and try to
   // constant prop them.  This will promote GEP instructions with constant
   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
-  ConstantPropUsersOf(NewGV);
+  ConstantPropUsersOf(NewGV, Context);
   if (RepValue != NewGV)
-    ConstantPropUsersOf(RepValue);
+    ConstantPropUsersOf(RepValue, Context);
+
+  return NewGV;
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc.  Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+                                                     CallInst *CI,
+                                                     BitCastInst *BCI,
+                                                     LLVMContext &Context,
+                                                     TargetData* TD) {
+  const Type *IntPtrTy = TD->getIntPtrType(Context);
+  
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *CI);
+
+  ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI,
+                                                                Context, TD));
+  if (NElements->getZExtValue() != 1) {
+    // If we have an array allocation, transform it to a single element
+    // allocation to make the code below simpler.
+    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
+                                 NElements->getZExtValue());
+    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
+    Instruction* NewMI = cast<Instruction>(NewM);
+    Value* Indices[2];
+    Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
+    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
+                                              NewMI->getName()+".el0", CI);
+    BCI->replaceAllUsesWith(NewGEP);
+    BCI->eraseFromParent();
+    CI->eraseFromParent();
+    BCI = cast<BitCastInst>(NewMI);
+    CI = extractMallocCallFromBitCast(NewMI);
+  }
+
+  // Create the new global variable.  The contents of the malloc'd memory is
+  // undefined, so initialize with an undef value.
+  // FIXME: This new global should have the alignment returned by malloc.  Code
+  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+  // this would only guarantee some lower alignment.
+  const Type *MAT = getMallocAllocatedType(CI);
+  Constant *Init = UndefValue::get(MAT);
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+                                             MAT, false,
+                                             GlobalValue::InternalLinkage, Init,
+                                             GV->getName()+".body",
+                                             GV,
+                                             GV->isThreadLocal());
+  
+  // Anything that used the malloc now uses the global directly.
+  BCI->replaceAllUsesWith(NewGV);
+
+  Constant *RepValue = NewGV;
+  if (NewGV->getType() != GV->getType()->getElementType())
+    RepValue = ConstantExpr::getBitCast(RepValue, 
+                                        GV->getType()->getElementType());
+
+  // If there is a comparison against null, we will insert a global bool to
+  // keep track of whether the global was initialized yet or not.
+  GlobalVariable *InitBool =
+    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(Context), GV->getName()+".init",
+                       GV->isThreadLocal());
+  bool InitBoolUsed = false;
+
+  // Loop over all uses of GV, processing them in turn.
+  std::vector<StoreInst*> Stores;
+  while (!GV->use_empty())
+    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
+      while (!LI->use_empty()) {
+        Use &LoadUse = LI->use_begin().getUse();
+        if (!isa<ICmpInst>(LoadUse.getUser()))
+          LoadUse = RepValue;
+        else {
+          ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+          // Replace the cmp X, 0 with a use of the bool value.
+          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+          InitBoolUsed = true;
+          switch (ICI->getPredicate()) {
+          default: llvm_unreachable("Unknown ICmp Predicate!");
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_SLT:
+            LV = ConstantInt::getFalse(Context);   // X < null -> always false
+            break;
+          case ICmpInst::ICMP_ULE:
+          case ICmpInst::ICMP_SLE:
+          case ICmpInst::ICMP_EQ:
+            LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+            break;
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_UGE:
+          case ICmpInst::ICMP_SGE:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_SGT:
+            break;  // no change.
+          }
+          ICI->replaceAllUsesWith(LV);
+          ICI->eraseFromParent();
+        }
+      }
+      LI->eraseFromParent();
+    } else {
+      StoreInst *SI = cast<StoreInst>(GV->use_back());
+      // The global is initialized when the store to it occurs.
+      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
+      SI->eraseFromParent();
+    }
+
+  // If the initialization boolean was used, insert it, otherwise delete it.
+  if (!InitBoolUsed) {
+    while (!InitBool->use_empty())  // Delete initializations
+      cast<Instruction>(InitBool->use_back())->eraseFromParent();
+    delete InitBool;
+  } else
+    GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+
+  // Now the GV is dead, nuke it and the malloc.
+  GV->eraseFromParent();
+  BCI->eraseFromParent();
+  CI->eraseFromParent();
+
+  // To further other optimizations, loop over all users of NewGV and try to
+  // constant prop them.  This will promote GEP instructions with constant
+  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+  ConstantPropUsersOf(NewGV, Context);
+  if (RepValue != NewGV)
+    ConstantPropUsersOf(RepValue, Context);
 
   return NewGV;
 }
@@ -1071,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
 /// GV are simple enough to perform HeapSRA, return true.
 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
-                                                    MallocInst *MI) {
+                                                    Instruction *StoredVal) {
   SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
   SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad;
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
@@ -1095,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
       Value *InVal = PN->getIncomingValue(op);
       
       // PHI of the stored value itself is ok.
-      if (InVal == MI) continue;
+      if (InVal == StoredVal) continue;
       
       if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
         // One of the PHIs in our set is (optimistically) ok.
@@ -1121,7 +1269,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
 
 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   
   if (FieldNo >= FieldVals.size())
@@ -1139,19 +1288,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     // a new Load of the scalarized global.
     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
                                            InsertedScalarizedValues,
-                                           PHIsToRewrite),
-                          LI->getName()+".f" + utostr(FieldNo), LI);
+                                           PHIsToRewrite, Context),
+                          LI->getName()+".f"+Twine(FieldNo), LI);
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
     const StructType *ST = 
       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
     
-    Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
-                            PN->getName()+".f"+utostr(FieldNo), PN);
+    Result =
+     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                     PN->getName()+".f"+Twine(FieldNo), PN);
     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   } else {
-    assert(0 && "Unknown usable value");
+    llvm_unreachable("Unknown usable value");
     Result = 0;
   }
   
@@ -1162,18 +1312,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
 static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   // If this is a comparison against null, handle it.
   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
     // If we have a setcc of the loaded pointer, we can use a setcc of any
     // field.
     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
-                                   InsertedScalarizedValues, PHIsToRewrite);
+                                   InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
     
-    Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
-                              Constant::getNullValue(NPtr->getType()),
-                              SCI->getName(), SCI);
+    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+                              Constant::getNullValue(NPtr->getType()), 
+                              SCI->getName());
     SCI->replaceAllUsesWith(New);
     SCI->eraseFromParent();
     return;
@@ -1187,7 +1339,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
-                                     InsertedScalarizedValues, PHIsToRewrite);
+                                     InsertedScalarizedValues, PHIsToRewrite,
+                                     Context);
     
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
@@ -1219,7 +1372,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   // users.
   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
+                            Context);
   }
 }
 
@@ -1229,11 +1383,13 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
-                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
+                   LLVMContext &Context) {
   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
        UI != E; ) {
     Instruction *User = cast<Instruction>(*UI++);
-    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
+                            Context);
   }
   
   if (Load->use_empty()) {
@@ -1244,8 +1400,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 
 /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
-  DOUT << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI;
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI,
+                                            LLVMContext &Context){
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI);
   const StructType *STy = cast<StructType>(MI->getAllocatedType());
 
   // There is guaranteed to be at least one use of the malloc (storing
@@ -1264,14 +1421,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     const Type *PFieldTy = PointerType::getUnqual(FieldTy);
     
     GlobalVariable *NGV =
-      new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage,
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
                          Constant::getNullValue(PFieldTy),
-                         GV->getName() + ".f" + utostr(FieldNo), GV,
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
     
     MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
-                                     MI->getName() + ".f" + utostr(FieldNo),MI);
+                                     MI->getName() + ".f" + Twine(FieldNo), MI);
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, MI);
   }
@@ -1290,9 +1448,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   //    }
   Value *RunningOr = 0;
   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
-    Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i],
-                             Constant::getNullValue(FieldMallocs[i]->getType()),
-                                  "isnull", MI);
+    Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                              Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull");
     if (!RunningOr)
       RunningOr = Cond;   // First seteq
     else
@@ -1305,7 +1463,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
-  BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null",
+  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
                                                 OrigBB->getParent());
   
   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
@@ -1317,11 +1475,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
   // pointer, because some may be null while others are not.
   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
-    Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, 
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
                               Constant::getNullValue(GVVal->getType()),
-                              "tmp", NullPtrBlock);
-    BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent());
-    BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent());
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", 
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Context, "next", 
+                                               OrigBB->getParent());
     BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
 
     // Fill in FreeBlock.
@@ -1353,7 +1513,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     Instruction *User = cast<Instruction>(*UI++);
     
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
       continue;
     }
     
@@ -1384,7 +1545,192 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *InVal = PN->getIncomingValue(i);
       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
-                               PHIsToRewrite);
+                               PHIsToRewrite, Context);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+  
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+  
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+  
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
+                                            CallInst *CI, BitCastInst* BCI, 
+                                            LLVMContext &Context,
+                                            TargetData *TD){
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI 
+               << " BITCAST = " << *BCI << '\n');
+  const Type* MAT = getMallocAllocatedType(CI);
+  const StructType *STy = cast<StructType>(MAT);
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(BCI, GV);
+  
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as CI, and N globals.
+  std::vector<Value*> FieldGlobals;
+  std::vector<Value*> FieldMallocs;
+  
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    const Type *FieldTy = STy->getElementType(FieldNo);
+    const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+    
+    GlobalVariable *NGV =
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
+                         Constant::getNullValue(PFieldTy),
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
+                         GV->isThreadLocal());
+    FieldGlobals.push_back(NGV);
+    
+    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy,
+                                        getMallocArraySize(CI, Context, TD),
+                                        BCI->getName() + ".f" + Twine(FieldNo));
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, BCI);
+  }
+  
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  Value *RunningOr = 0;
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                              Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull");
+    if (!RunningOr)
+      RunningOr = Cond;   // First seteq
+    else
+      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = BCI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
+  
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
+                                                OrigBB->getParent());
+  
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+  
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
+                              Constant::getNullValue(GVVal->getType()),
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
+                                               OrigBB->getParent());
+    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    new FreeInst(GVVal, FreeBlock);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
+    
+    NullPtrBlock = NextBlock;
+  }
+  
+  BranchInst::Create(ContBB, NullPtrBlock);
+  
+  // CI and BCI are no longer needed, remove them.
+  BCI->eraseFromParent();
+  CI->eraseFromParent();
+
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+  
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+  
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
+                                   Context);
+      continue;
+    }
+    
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+    
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite, Context);
       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
@@ -1422,7 +1768,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                MallocInst *MI,
                                                Module::global_iterator &GVI,
-                                               TargetData &TD) {
+                                               TargetData *TD,
+                                               LLVMContext &Context) {
   // If this is a malloc of an abstract type, don't touch it.
   if (!MI->getAllocatedType()->isSized())
     return false;
@@ -1456,9 +1803,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // Restrict this transformation to only working on small allocations
     // (2048 bytes currently), as we don't want to introduce a 16M global or
     // something.
-    if (NElements->getZExtValue()*
-        TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
+    if (TD &&
+        NElements->getZExtValue()*
+        TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context);
       return true;
     }
   }
@@ -1485,7 +1833,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
         MallocInst *NewMI = 
           new MallocInst(AllocSTy, 
-                         ConstantInt::get(Type::Int32Ty, AT->getNumElements()),
+                  ConstantInt::get(Type::getInt32Ty(Context),
+                  AT->getNumElements()),
                          "", MI);
         NewMI->takeName(MI);
         Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
@@ -1494,7 +1843,100 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
         MI = NewMI;
       }
       
-      GVI = PerformHeapAllocSRoA(GV, MI);
+      GVI = PerformHeapAllocSRoA(GV, MI, Context);
+      return true;
+    }
+  }
+  
+  return false;
+}  
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+                                               CallInst *CI,
+                                               BitCastInst *BCI,
+                                               Module::global_iterator &GVI,
+                                               TargetData *TD,
+                                               LLVMContext &Context) {
+  // If we can't figure out the type being malloced, then we can't optimize.
+  const Type *AllocTy = getMallocAllocatedType(CI);
+  assert(AllocTy);
+
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!AllocTy->isSized())
+    return false;
+
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded setcc'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  {
+    SmallPtrSet<PHINode*, 8> PHIs;
+    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+      return false;
+  }  
+
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  if (ConstantInt *NElements =
+              dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) {
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (TD && 
+        NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
+      return true;
+    }
+  }
+  
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (!isArrayMalloc(CI, Context, TD))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+  
+  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+    // This the structure has an unreasonable number of fields, leave it
+    // alone.
+    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+
+      // If this is a fixed size array, transform the Malloc to be an alloc of
+      // structs.  malloc [100 x struct],1 -> malloc struct, 100
+      if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+        Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
+                                              AT->getNumElements());
+        Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                              AllocSTy, NumElements,
+                                              BCI->getName());
+        Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
+        BCI->replaceAllUsesWith(Cast);
+        BCI->eraseFromParent();
+        CI->eraseFromParent();
+        BCI = cast<BitCastInst>(NewMI);
+        CI = extractMallocCallFromBitCast(NewMI);
+      }
+      
+      GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
       return true;
     }
   }
@@ -1506,7 +1948,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 // that only one value (besides its initializer) is ever stored to the global.
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                      Module::global_iterator &GVI,
-                                     TargetData &TD) {
+                                     TargetData *TD, LLVMContext &Context) {
   // Ignore no-op GEPs and bitcasts.
   StoredOnceVal = StoredOnceVal->stripPointerCasts();
 
@@ -1518,14 +1960,25 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
       GV->getInitializer()->isNullValue()) {
     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
       if (GV->getInitializer()->getType() != SOVC->getType())
-        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+        SOVC = 
+         ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
-      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
         return true;
     } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
-      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD))
+      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context))
         return true;
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+      if (getMallocAllocatedType(CI)) {
+        BitCastInst* BCI = NULL;
+        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+             UI != E; )
+          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
+        if (BCI &&
+            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
+          return true;
+      }
     }
   }
 
@@ -1536,7 +1989,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
 /// two values ever stored into GV are its initializer and OtherVal.  See if we
 /// can shrink the global into a boolean and select between the two values
 /// whenever it is used.  This exposes the values to other scalar optimizations.
-static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
+                                       LLVMContext &Context) {
   const Type *GVElType = GV->getType()->getElementType();
   
   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
@@ -1544,7 +1998,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   // between them is very expensive and unlikely to lead to later
   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   // where v1 and v2 both require constant pool loads, a big loss.
-  if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() ||
+  if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||
       isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
     return false;
   
@@ -1554,18 +2008,19 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
       return false;
   
-  DOUT << "   *** SHRINKING TO BOOL: " << *GV;
+  DEBUG(errs() << "   *** SHRINKING TO BOOL: " << *GV);
   
   // Create the new global, initializing it to false.
-  GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false,
-         GlobalValue::InternalLinkage, ConstantInt::getFalse(),
+  GlobalVariable *NewGV = new GlobalVariable(Context,
+                                             Type::getInt1Ty(Context), false,
+         GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),
                                              GV->getName()+".b",
-                                             (Module *)NULL,
                                              GV->isThreadLocal());
   GV->getParent()->getGlobalList().insert(GV, NewGV);
 
   Constant *InitVal = GV->getInitializer();
-  assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!");
+  assert(InitVal->getType() != Type::getInt1Ty(Context) &&
+         "No reason to shrink to bool!");
 
   // If initialized to zero and storing one into the global, we can use a cast
   // instead of a select to synthesize the desired value.
@@ -1581,7 +2036,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
       // Only do this if we weren't storing a loaded value.
       Value *StoreVal;
       if (StoringOther || SI->getOperand(0) == InitVal)
-        StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther);
+        StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);
       else {
         // Otherwise, we are storing a previously loaded copy.  To do this,
         // change the copy from copying the original value to just copying the
@@ -1632,7 +2087,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   GV->removeDeadConstantUsers();
 
   if (GV->use_empty()) {
-    DOUT << "GLOBAL DEAD: " << *GV;
+    DEBUG(errs() << "GLOBAL DEAD: " << *GV);
     GV->eraseFromParent();
     ++NumDeleted;
     return true;
@@ -1675,7 +2130,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
         GS.AccessingFunction->getName() == "main" &&
         GS.AccessingFunction->hasExternalLinkage() &&
         GV->getType()->getAddressSpace() == 0) {
-      DOUT << "LOCALIZING GLOBAL: " << *GV;
+      DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV);
       Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
       const Type* ElemTy = GV->getType()->getElementType();
       // FIXME: Pass Global's alignment when globals have alignment
@@ -1692,11 +2147,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     // If the global is never loaded (but may be stored to), it is dead.
     // Delete it now.
     if (!GS.isLoaded) {
-      DOUT << "GLOBAL NEVER LOADED: " << *GV;
+      DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV);
 
       // Delete any stores we can find to the global.  We may not be able to
       // make it completely dead though.
-      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
+                                                GV->getContext());
 
       // If the global is dead now, delete it.
       if (GV->use_empty()) {
@@ -1707,16 +2163,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       return Changed;
 
     } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
-      DOUT << "MARKING CONSTANT: " << *GV;
+      DEBUG(errs() << "MARKING CONSTANT: " << *GV);
       GV->setConstant(true);
 
       // Clean up any obviously simplifiable users now.
-      CleanupConstantGlobalUsers(GV, GV->getInitializer());
+      CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());
 
       // If the global is dead now, just nuke it.
       if (GV->use_empty()) {
-        DOUT << "   *** Marking constant allowed us to simplify "
-             << "all users and delete global!\n";
+        DEBUG(errs() << "   *** Marking constant allowed us to simplify "
+                     << "all users and delete global!\n");
         GV->eraseFromParent();
         ++NumDeleted;
       }
@@ -1724,11 +2180,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       ++NumMarked;
       return true;
     } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
-      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, 
-                                                 getAnalysis<TargetData>())) {
-        GVI = FirstNewGV;  // Don't skip the newly produced globals!
-        return true;
-      }
+      if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD,
+                                                   GV->getContext())) {
+          GVI = FirstNewGV;  // Don't skip the newly produced globals!
+          return true;
+        }
     } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
       // If the initial value for the global was an undef value, and if only
       // one other value was stored into it, we can just change the
@@ -1740,11 +2197,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
           GV->setInitializer(SOVConstant);
 
           // Clean up any obviously simplifiable users now.
-          CleanupConstantGlobalUsers(GV, GV->getInitializer());
+          CleanupConstantGlobalUsers(GV, GV->getInitializer(), 
+                                     GV->getContext());
 
           if (GV->use_empty()) {
-            DOUT << "   *** Substituting initializer allowed us to "
-                 << "simplify all users and delete global!\n";
+            DEBUG(errs() << "   *** Substituting initializer allowed us to "
+                         << "simplify all users and delete global!\n");
             GV->eraseFromParent();
             ++NumDeleted;
           } else {
@@ -1757,13 +2215,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       // Try to optimize globals based on the knowledge that only one value
       // (besides its initializer) is ever stored to the global.
       if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
-                                   getAnalysis<TargetData>()))
+                                   getAnalysisIfAvailable<TargetData>(),
+                                   GV->getContext()))
         return true;
 
       // Otherwise, if the global was not a boolean, we can shrink it to be a
       // boolean.
       if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+        if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {
           ++NumShrunkToBool;
           return true;
         }
@@ -1866,16 +2325,16 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       if (!ATy) return 0;
       const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       if (!STy || STy->getNumElements() != 2 ||
-          STy->getElementType(0) != Type::Int32Ty) return 0;
+          STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0;
       const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
       if (!PFTy) return 0;
       const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
-      if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() ||
-          FTy->getNumParams() != 0)
+      if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) ||
+          FTy->isVarArg() || FTy->getNumParams() != 0)
         return 0;
       
       // Verify that the initializer is simple enough for us to handle.
-      if (!I->hasInitializer()) return 0;
+      if (!I->hasDefinitiveInitializer()) return 0;
       ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
       if (!CA) return 0;
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -1916,10 +2375,11 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
 /// specified array, returning the new global to use.
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 
-                                          const std::vector<Function*> &Ctors) {
+                                          const std::vector<Function*> &Ctors,
+                                          LLVMContext &Context) {
   // If we made a change, reassemble the initializer list.
   std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535));
+  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));
   CSVals.push_back(0);
   
   // Create the new init list.
@@ -1928,19 +2388,19 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     if (Ctors[i]) {
       CSVals[1] = Ctors[i];
     } else {
-      const Type *FTy = FunctionType::get(Type::VoidTy, false);
+      const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);
       const PointerType *PFTy = PointerType::getUnqual(FTy);
       CSVals[1] = Constant::getNullValue(PFTy);
-      CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647);
+      CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);
     }
-    CAList.push_back(ConstantStruct::get(CSVals));
+    CAList.push_back(ConstantStruct::get(Context, CSVals, false));
   }
   
   // Create the array initializer.
   const Type *StructTy =
-    cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
-  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()),
-                                    CAList);
+      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 
+                                                   CAList.size()), CAList);
   
   // If we didn't change the number of elements, don't create a new GV.
   if (CA->getType() == GCL->getInitializer()->getType()) {
@@ -1949,9 +2409,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   }
   
   // Create the new global and insert it next to the existing list.
-  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+  GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), 
+                                           GCL->isConstant(),
                                            GCL->getLinkage(), CA, "",
-                                           (Module *)NULL,
                                            GCL->isThreadLocal());
   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   NGV->takeName(GCL);
@@ -1984,21 +2444,38 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
 /// enough for us to understand.  In particular, if it is a cast of something,
 /// we punt.  We basically just support direct accesses to globals and GEP's of
 /// globals.  This should be kept up to date with CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
-      return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
-    return !GV->isDeclaration();  // reject external globals.
-  }
+static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
+  // Conservatively, avoid aggregate types. This is because we don't
+  // want to worry about them partially overlapping other stores.
+  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+    return false;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    // Do not allow weak/linkonce/dllimport/dllexport linkage or
+    // external globals.
+    return GV->hasDefinitiveInitializer();
+
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     // Handle a constantexpr gep.
     if (CE->getOpcode() == Instruction::GetElementPtr &&
-        isa<GlobalVariable>(CE->getOperand(0))) {
+        isa<GlobalVariable>(CE->getOperand(0)) &&
+        cast<GEPOperator>(CE)->isInBounds()) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
-        return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
-      return GV->hasInitializer() &&
-             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+      // Do not allow weak/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      if (!GV->hasDefinitiveInitializer())
+        return false;
+
+      // The first index must be zero.
+      ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+      if (!CI || !CI->isZero()) return false;
+
+      // The remaining indices must be compile-time known integers within the
+      // notional bounds of the corresponding static array types.
+      if (!CE->isGEPWithNoNotionalOverIndexing())
+        return false;
+
+      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
     }
   return false;
 }
@@ -2007,7 +2484,8 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
-                                   ConstantExpr *Addr, unsigned OpNo) {
+                                   ConstantExpr *Addr, unsigned OpNo,
+                                   LLVMContext &Context) {
   // Base case of the recursion.
   if (OpNo == Addr->getNumOperands()) {
     assert(Val->getType() == Init->getType() && "Type mismatch!");
@@ -2028,7 +2506,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         Elts.push_back(UndefValue::get(STy->getElementType(i)));
     } else {
-      assert(0 && "This code is out of sync with "
+      llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
     
@@ -2036,10 +2514,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
     unsigned Idx = CU->getZExtValue();
     assert(Idx < STy->getNumElements() && "Struct index out of range!");
-    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);
     
     // Return the modified struct.
-    return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked());
+    return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());
   } else {
     ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
     const ArrayType *ATy = cast<ArrayType>(Init->getType());
@@ -2056,20 +2534,21 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       Constant *Elt = UndefValue::get(ATy->getElementType());
       Elts.assign(ATy->getNumElements(), Elt);
     } else {
-      assert(0 && "This code is out of sync with "
+      llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
     
     assert(CI->getZExtValue() < ATy->getNumElements());
     Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);
     return ConstantArray::get(ATy, Elts);
   }    
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
-static void CommitValueTo(Constant *Val, Constant *Addr) {
+static void CommitValueTo(Constant *Val, Constant *Addr,
+                          LLVMContext &Context) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
     assert(GV->hasInitializer());
     GV->setInitializer(Val);
@@ -2080,7 +2559,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   
   Constant *Init = GV->getInitializer();
-  Init = EvaluateStoreInto(Init, Val, CE, 2);
+  Init = EvaluateStoreInto(Init, Val, CE, 2, Context);
   GV->setInitializer(Init);
 }
 
@@ -2088,7 +2567,8 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
 /// P after the stores reflected by 'memory' have been performed.  If we can't
 /// decide, return null.
 static Constant *ComputeLoadResult(Constant *P,
-                                const DenseMap<Constant*, Constant*> &Memory) {
+                                const DenseMap<Constant*, Constant*> &Memory,
+                                LLVMContext &Context) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
   DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
@@ -2096,7 +2576,7 @@ static Constant *ComputeLoadResult(Constant *P,
  
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
-    if (GV->hasInitializer())
+    if (GV->hasDefinitiveInitializer())
       return GV->getInitializer();
     return 0;
   }
@@ -2106,7 +2586,7 @@ static Constant *ComputeLoadResult(Constant *P,
     if (CE->getOpcode() == Instruction::GetElementPtr &&
         isa<GlobalVariable>(CE->getOperand(0))) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      if (GV->hasInitializer())
+      if (GV->hasDefinitiveInitializer())
         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
     }
 
@@ -2117,7 +2597,7 @@ static Constant *ComputeLoadResult(Constant *P,
 /// successful, false if we can't evaluate it.  ActualArgs contains the formal
 /// arguments for the function.
 static bool EvaluateFunction(Function *F, Constant *&RetVal,
-                             const std::vector<Constant*> &ActualArgs,
+                             const SmallVectorImpl<Constant*> &ActualArgs,
                              std::vector<Function*> &CallStack,
                              DenseMap<Constant*, Constant*> &MutatedMemory,
                              std::vector<GlobalVariable*> &AllocaTmps) {
@@ -2126,6 +2606,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
     return false;
   
+  LLVMContext &Context = F->getContext();
+  
   CallStack.push_back(F);
   
   /// Values - As we compute SSA register values, we store their contents here.
@@ -2152,7 +2634,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (SI->isVolatile()) return false;  // no volatile accesses.
       Constant *Ptr = getVal(Values, SI->getOperand(1));
-      if (!isSimpleEnoughPointerToCommit(Ptr))
+      if (!isSimpleEnoughPointerToCommit(Ptr, Context))
         // If this is too complex for us to commit, reject it.
         return false;
       Constant *Val = getVal(Values, SI->getOperand(0));
@@ -2170,7 +2652,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                                          getVal(Values, CI->getOperand(0)),
                                          CI->getType());
     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
-      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+      InstResult =
+            ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
                                            getVal(Values, SI->getOperand(1)),
                                            getVal(Values, SI->getOperand(2)));
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
@@ -2179,16 +2662,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
            i != e; ++i)
         GEPOps.push_back(getVal(Values, *i));
-      InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+      InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+          ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+          ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
       if (LI->isVolatile()) return false;  // no volatile accesses.
       InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
-                                     MutatedMemory);
+                                     MutatedMemory, Context);
       if (InstResult == 0) return false; // Could not evaluate load.
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
       if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
       const Type *Ty = AI->getType()->getElementType();
-      AllocaTmps.push_back(new GlobalVariable(Ty, false,
+      AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
@@ -2208,14 +2693,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));
       if (!Callee) return false;  // Cannot resolve.
 
-      std::vector<Constant*> Formals;
+      SmallVector<Constant*, 8> Formals;
       for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
            i != e; ++i)
         Formals.push_back(getVal(Values, *i));
-      
+
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(Callee, &Formals[0],
+        if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
                                            Formals.size())) {
           InstResult = C;
         } else {
@@ -2310,16 +2795,17 @@ static bool EvaluateStaticConstructor(Function *F) {
 
   // Call the function.
   Constant *RetValDummy;
-  bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(),
-                                       CallStack, MutatedMemory, AllocaTmps);
+  bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+                                      SmallVector<Constant*, 0>(), CallStack,
+                                      MutatedMemory, AllocaTmps);
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
-    DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
-         << F->getName() << "' to " << MutatedMemory.size()
-         << " stores.\n";
+    DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+          << F->getName() << "' to " << MutatedMemory.size()
+          << " stores.\n");
     for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
          E = MutatedMemory.end(); I != E; ++I)
-      CommitValueTo(I->second, I->first);
+      CommitValueTo(I->second, I->first, F->getContext());
   }
   
   // At this point, we are done interpreting.  If we created any 'alloca'
@@ -2376,7 +2862,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   
   if (!MadeChange) return false;
   
-  GCL = InstallGlobalCtors(GCL, Ctors);
+  GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());
   return true;
 }
 
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index e4a9dea..7b0e9c7 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -129,7 +130,8 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
   Function::arg_iterator AI = F.arg_begin();
   for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
     // Do we have a constant argument?
-    if (ArgumentConstants[i].second || AI->use_empty())
+    if (ArgumentConstants[i].second || AI->use_empty() ||
+        (AI->hasByValAttr() && !F.onlyReadsMemory()))
       continue;
   
     Value *V = ArgumentConstants[i].first;
@@ -151,13 +153,15 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
 // callers will be updated to use the value they pass in directly instead of
 // using the return value.
 bool IPCP::PropagateConstantReturn(Function &F) {
-  if (F.getReturnType() == Type::VoidTy)
+  if (F.getReturnType() == Type::getVoidTy(F.getContext()))
     return false; // No return value.
 
   // If this function could be overridden later in the link stage, we can't
   // propagate information about its results into callers.
   if (F.mayBeOverridden())
     return false;
+    
+  LLVMContext &Context = F.getContext();
   
   // Check to see if this function returns a constant.
   SmallVector<Value *,4> RetVals;
@@ -182,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
         if (!STy)
           V = RI->getOperand(i);
         else
-          V = FindInsertedValue(RI->getOperand(0), i);
+          V = FindInsertedValue(RI->getOperand(0), i, Context);
 
         if (V) {
           // Ignore undefs, we can change them into anything
diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp
index b55dea2..e7884ec 100644
--- a/lib/Transforms/IPO/IndMemRemoval.cpp
+++ b/lib/Transforms/IPO/IndMemRemoval.cpp
@@ -1,4 +1,4 @@
-//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===//
+//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,8 +10,8 @@
 // This pass finds places where memory allocation functions may escape into
 // indirect land.  Some transforms are much easier (aka possible) only if free 
 // or malloc are not called indirectly.
-// Thus find places where the address of memory functions are taken and construct
-// bounce functions with direct calls of those functions.
+// Thus find places where the address of memory functions are taken and 
+// construct bounce functions with direct calls of those functions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,8 +55,8 @@ bool IndMemRemPass::runOnModule(Module &M) {
       Function* FN = Function::Create(F->getFunctionType(),
                                       GlobalValue::LinkOnceAnyLinkage,
                                       "free_llvm_bounce", &M);
-      BasicBlock* bb = BasicBlock::Create("entry",FN);
-      Instruction* R = ReturnInst::Create(bb);
+      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN);
+      Instruction* R = ReturnInst::Create(M.getContext(), bb);
       new FreeInst(FN->arg_begin(), R);
       ++NumBounce;
       NumBounceSites += F->getNumUses();
@@ -70,11 +70,12 @@ bool IndMemRemPass::runOnModule(Module &M) {
                                       GlobalValue::LinkOnceAnyLinkage,
                                       "malloc_llvm_bounce", &M);
       FN->setDoesNotAlias(0);
-      BasicBlock* bb = BasicBlock::Create("entry",FN);
+      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN);
       Instruction* c = CastInst::CreateIntegerCast(
-          FN->arg_begin(), Type::Int32Ty, false, "c", bb);
-      Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb);
-      ReturnInst::Create(a, bb);
+          FN->arg_begin(), Type::getInt32Ty(M.getContext()), false, "c", bb);
+      Instruction* a = new MallocInst(Type::getInt8Ty(M.getContext()),
+                                      c, "m", bb);
+      ReturnInst::Create(M.getContext(), a, bb);
       ++NumBounce;
       NumBounceSites += F->getNumUses();
       F->replaceAllUsesWith(FN);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 5f9ea54..2344403 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -19,11 +19,11 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/InlineCost.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index e107a00..b1c643b 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -18,11 +18,11 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/InlineCost.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
@@ -78,7 +78,7 @@ bool SimpleInliner::doInitialization(CallGraph &CG) {
     return false;
 
   // Don't crash on invalid code
-  if (!GV->hasInitializer())
+  if (!GV->hasDefinitiveInitializer())
     return false;
   
   const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index b382837..ea47366 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -18,21 +18,25 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
 static cl::opt<int>
-InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
+InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore,
         cl::desc("Control the amount of inlining to perform (default = 200)"));
 
 Inliner::Inliner(void *ID) 
@@ -45,19 +49,32 @@ Inliner::Inliner(void *ID, int Threshold)
 /// the call graph.  If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
-  Info.addRequired<TargetData>();
   CallGraphSCCPass::getAnalysisUsage(Info);
 }
 
-// InlineCallIfPossible - If it is possible to inline the specified call site,
-// do so and update the CallGraph for this operation.
-bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
-                                 const SmallPtrSet<Function*, 8> &SCCFunctions,
-                                 const TargetData &TD) {
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other  functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
+                                 const TargetData *TD,
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
-  if (!InlineFunction(CS, &CG, &TD)) return false;
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  SmallVector<AllocaInst*, 16> StaticAllocas;
+  if (!InlineFunction(CS, &CG, TD, &StaticAllocas))
+    return false;
 
   // If the inlined function had a higher stack protection level than the
   // calling function, then bump up the caller's stack protection level.
@@ -67,23 +84,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
            !Caller->hasFnAttr(Attribute::StackProtectReq))
     Caller->addFnAttr(Attribute::StackProtect);
 
-  // If we inlined the last possible call site to the function, delete the
-  // function body now.
-  if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
-                              Callee->hasAvailableExternallyLinkage()) &&
-      !SCCFunctions.count(Callee)) {
-    DOUT << "    -> Deleting dead function: " << Callee->getName() << "\n";
-    CallGraphNode *CalleeNode = CG[Callee];
-
-    // Remove any call graph edges from the callee to its callees.
-    CalleeNode->removeAllCalledFunctions();
-
-    resetCachedCostInfo(CalleeNode->getFunction());
+  
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+  
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+  for (unsigned AllocaNo = 0, e = StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+    AllocaInst *AI = StaticAllocas[AllocaNo];
+    
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (ATy == 0 || AI->isArrayAllocation())
+      continue;
+    
+    // Get the list of all available allocas for this array type.
+    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+    
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+      AllocaInst *AvailableAlloca = AllocasForType[i];
+      
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+      
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca))
+        continue;
+      
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI);
+      
+      AI->replaceAllUsesWith(AvailableAlloca);
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      ++NumMergedAllocas;
+      break;
+    }
 
-    // Removing the node for callee from the call graph and delete it.
-    delete CG.removeFunctionFromModule(CalleeNode);
-    ++NumDeleted;
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
   }
+  
   return true;
 }
         
@@ -91,69 +174,145 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
 /// at the given CallSite.
 bool Inliner::shouldInline(CallSite CS) {
   InlineCost IC = getInlineCost(CS);
-  float FudgeFactor = getInlineFudgeFactor(CS);
   
   if (IC.isAlways()) {
-    DOUT << "    Inlining: cost=always"
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    Inlining: cost=always"
+          << ", Call: " << *CS.getInstruction() << "\n");
     return true;
   }
   
   if (IC.isNever()) {
-    DOUT << "    NOT Inlining: cost=never"
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    NOT Inlining: cost=never"
+          << ", Call: " << *CS.getInstruction() << "\n");
     return false;
   }
   
   int Cost = IC.getValue();
   int CurrentThreshold = InlineThreshold;
-  Function *Fn = CS.getCaller();
-  if (Fn && !Fn->isDeclaration() 
-      && Fn->hasFnAttr(Attribute::OptimizeForSize)
-      && InlineThreshold != 50) {
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+      InlineLimit.getNumOccurrences() == 0 &&
+      InlineThreshold != 50)
     CurrentThreshold = 50;
-  }
   
+  float FudgeFactor = getInlineFudgeFactor(CS);
   if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
-    DOUT << "    NOT Inlining: cost=" << Cost
-         << ", Call: " << *CS.getInstruction();
+    DEBUG(errs() << "    NOT Inlining: cost=" << Cost
+          << ", Call: " << *CS.getInstruction() << "\n");
     return false;
-  } else {
-    DOUT << "    Inlining: cost=" << Cost
-         << ", Call: " << *CS.getInstruction();
-    return true;
   }
+  
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    bool allOuterCallsWillBeInlined = true;
+    bool someOuterCallWouldNotBeInlined = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2 = CallSite::get(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.
+      if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+        continue;
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        allOuterCallsWillBeInlined = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = InlineThreshold;
+      Function *Caller2 = CS2.getCaller();
+      if (Caller2 && !Caller2->isDeclaration() &&
+          Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
+          InlineThreshold != 50)
+        CurrentThreshold2 = 50;
+
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        allOuterCallsWillBeInlined = false;
+
+      // See if we have this case.  We subtract off the penalty
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
+                (int)(CurrentThreshold2 * FudgeFactor2)) {
+        someOuterCallWouldNotBeInlined = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
+        TotalSecondaryCost < Cost) {
+      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
+  DEBUG(errs() << "    Inlining: cost=" << Cost
+        << ", Call: " << *CS.getInstruction() << '\n');
+  return true;
 }
 
-bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
+bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
-  TargetData &TD = getAnalysis<TargetData>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
-  DOUT << "Inliner visiting SCC:";
+  DEBUG(errs() << "Inliner visiting SCC:");
   for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
     Function *F = SCC[i]->getFunction();
     if (F) SCCFunctions.insert(F);
-    DOUT << " " << (F ? F->getName() : "INDIRECTNODE");
+    DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
 
   // Scan through and identify all call sites ahead of time so that we only
   // inline call sites in the original functions, not call sites that result
   // from inlining other functions.
-  std::vector<CallSite> CallSites;
+  SmallVector<CallSite, 16> CallSites;
 
-  for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    if (Function *F = SCC[i]->getFunction())
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-        for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-          CallSite CS = CallSite::get(I);
-          if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(I) &&
-                                     (!CS.getCalledFunction() ||
-                                      !CS.getCalledFunction()->isDeclaration()))
-            CallSites.push_back(CS);
-        }
+  for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+    Function *F = SCC[i]->getFunction();
+    if (!F) continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS = CallSite::get(I);
+        // If this isn't a call, or it is a call to an intrinsic, it can
+        // never be inlined.
+        if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+          continue;
+        
+        // If this is a direct call to an external function, we can never inline
+        // it.  If it is an indirect call, inlining may resolve it to be a
+        // direct call, so we keep it.
+        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+          continue;
+        
+        CallSites.push_back(CS);
+      }
+  }
 
-  DOUT << ": " << CallSites.size() << " call sites.\n";
+  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
 
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
@@ -163,6 +322,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
       if (SCCFunctions.count(F))
         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
 
+  
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
   bool Changed = false;
@@ -171,51 +333,68 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
     LocalChange = false;
     // Iterate over the outer loop because inlining functions can cause indirect
     // calls to become direct calls.
-    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi)
-      if (Function *Callee = CallSites[CSi].getCalledFunction()) {
-        // Calls to external functions are never inlinable.
-        if (Callee->isDeclaration()) {
-          if (SCC.size() == 1) {
-            std::swap(CallSites[CSi], CallSites.back());
-            CallSites.pop_back();
-          } else {
-            // Keep the 'in SCC / not in SCC' boundary correct.
-            CallSites.erase(CallSites.begin()+CSi);
-          }
-          --CSi;
-          continue;
-        }
-
-        // If the policy determines that we should inline this function,
-        // try to do so.
-        CallSite CS = CallSites[CSi];
-        if (shouldInline(CS)) {
-          Function *Caller = CS.getCaller();
-          // Attempt to inline the function...
-          if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) {
-            // Remove any cached cost info for this caller, as inlining the
-            // callee has increased the size of the caller (which may be the
-            // same as the callee).
-            resetCachedCostInfo(Caller);
-
-            // Remove this call site from the list.  If possible, use 
-            // swap/pop_back for efficiency, but do not use it if doing so would
-            // move a call site to a function in this SCC before the
-            // 'FirstCallInSCC' barrier.
-            if (SCC.size() == 1) {
-              std::swap(CallSites[CSi], CallSites.back());
-              CallSites.pop_back();
-            } else {
-              CallSites.erase(CallSites.begin()+CSi);
-            }
-            --CSi;
-
-            ++NumInlined;
-            Changed = true;
-            LocalChange = true;
-          }
-        }
+    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+      CallSite CS = CallSites[CSi];
+      
+      Function *Callee = CS.getCalledFunction();
+      // We can only inline direct calls to non-declarations.
+      if (Callee == 0 || Callee->isDeclaration()) continue;
+      
+      // If the policy determines that we should inline this function,
+      // try to do so.
+      if (!shouldInline(CS))
+        continue;
+
+      Function *Caller = CS.getCaller();
+      // Attempt to inline the function...
+      if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+        continue;
+      
+      // If we inlined the last possible call site to the function, delete the
+      // function body now.
+      if (Callee->use_empty() && Callee->hasLocalLinkage() &&
+          // TODO: Can remove if in SCC now.
+          !SCCFunctions.count(Callee) &&
+          
+          // The function may be apparently dead, but if there are indirect
+          // callgraph references to the node, we cannot delete it yet, this
+          // could invalidate the CGSCC iterator.
+          CG[Callee]->getNumReferences() == 0) {
+        DEBUG(errs() << "    -> Deleting dead function: "
+              << Callee->getName() << "\n");
+        CallGraphNode *CalleeNode = CG[Callee];
+        
+        // Remove any call graph edges from the callee to its callees.
+        CalleeNode->removeAllCalledFunctions();
+        
+        resetCachedCostInfo(Callee);
+        
+        // Removing the node for callee from the call graph and delete it.
+        delete CG.removeFunctionFromModule(CalleeNode);
+        ++NumDeleted;
       }
+      
+      // Remove any cached cost info for this caller, as inlining the
+      // callee has increased the size of the caller (which may be the
+      // same as the callee).
+      resetCachedCostInfo(Caller);
+
+      // Remove this call site from the list.  If possible, use 
+      // swap/pop_back for efficiency, but do not use it if doing so would
+      // move a call site to a function in this SCC before the
+      // 'FirstCallInSCC' barrier.
+      if (SCC.size() == 1) {
+        std::swap(CallSites[CSi], CallSites.back());
+        CallSites.pop_back();
+      } else {
+        CallSites.erase(CallSites.begin()+CSi);
+      }
+      --CSi;
+
+      ++NumInlined;
+      Changed = true;
+      LocalChange = true;
+    }
   } while (LocalChange);
 
   return Changed;
@@ -227,47 +406,55 @@ bool Inliner::doFinalization(CallGraph &CG) {
   return removeDeadFunctions(CG);
 }
 
-  /// removeDeadFunctions - Remove dead functions that are not included in
-  /// DNR (Do Not Remove) list.
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
 bool Inliner::removeDeadFunctions(CallGraph &CG, 
-                                 SmallPtrSet<const Function *, 16> *DNR) {
-  std::set<CallGraphNode*> FunctionsToRemove;
+                                  SmallPtrSet<const Function *, 16> *DNR) {
+  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
   for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
     CallGraphNode *CGN = I->second;
-    if (Function *F = CGN ? CGN->getFunction() : 0) {
-      // If the only remaining users of the function are dead constants, remove
-      // them.
-      F->removeDeadConstantUsers();
-
-      if (DNR && DNR->count(F))
-        continue;
+    if (CGN->getFunction() == 0)
+      continue;
+    
+    Function *F = CGN->getFunction();
+    
+    // If the only remaining users of the function are dead constants, remove
+    // them.
+    F->removeDeadConstantUsers();
+
+    if (DNR && DNR->count(F))
+      continue;
+    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+        !F->hasAvailableExternallyLinkage())
+      continue;
+    if (!F->use_empty())
+      continue;
+    
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
 
-      if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) &&
-          F->use_empty()) {
-
-        // Remove any call graph edges from the function to its callees.
-        CGN->removeAllCalledFunctions();
-
-        // Remove any edges from the external node to the function's call graph
-        // node.  These edges might have been made irrelegant due to
-        // optimization of the program.
-        CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
-
-        // Removing the node for callee from the call graph and delete it.
-        FunctionsToRemove.insert(CGN);
-      }
-    }
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.insert(CGN);
   }
 
   // Now that we know which functions to delete, do so.  We didn't want to do
   // this inline, because that would invalidate our CallGraph::iterator
   // objects. :(
+  //
+  // Note that it doesn't matter that we are iterating over a non-stable set
+  // here to do this, it doesn't matter which order the functions are deleted
+  // in.
   bool Changed = false;
-  for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(),
-         E = FunctionsToRemove.end(); I != E; ++I) {
+  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+       E = FunctionsToRemove.end(); I != E; ++I) {
     resetCachedCostInfo((*I)->getFunction());
     delete CG.removeFunctionFromModule(*I);
     ++NumDeleted;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 5093ae9..e3c3c67 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
@@ -86,7 +87,7 @@ void InternalizePass::LoadFile(const char *Filename) {
   // Load the APIFile...
   std::ifstream In(Filename);
   if (!In.good()) {
-    cerr << "WARNING: Internalize couldn't load file '" << Filename
+    errs() << "WARNING: Internalize couldn't load file '" << Filename
          << "'! Continuing as if it's empty.\n";
     return; // Just continue as if the file were empty
   }
@@ -101,7 +102,7 @@ void InternalizePass::LoadFile(const char *Filename) {
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
-
+  
   if (ExternalNames.empty()) {
     // Return if we're not in 'all but main' mode and have no external api
     if (!AllButMain)
@@ -131,12 +132,14 @@ bool InternalizePass::runOnModule(Module &M) {
       if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
       Changed = true;
       ++NumFunctions;
-      DOUT << "Internalizing func " << I->getName() << "\n";
+      DEBUG(errs() << "Internalizing func " << I->getName() << "\n");
     }
 
   // Never internalize the llvm.used symbol.  It is used to implement
   // attribute((used)).
+  // FIXME: Shouldn't this just filter on llvm.metadata section??
   ExternalNames.insert("llvm.used");
+  ExternalNames.insert("llvm.compiler.used");
 
   // Never internalize anchors used by the machine module info, else the info
   // won't find them.  (see MachineModuleInfo.)
@@ -158,7 +161,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumGlobals;
-      DOUT << "Internalized gvar " << I->getName() << "\n";
+      DEBUG(errs() << "Internalized gvar " << I->getName() << "\n");
     }
 
   // Mark all aliases that are not in the api as internal as well.
@@ -169,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumAliases;
-      DOUT << "Internalized alias " << I->getName() << "\n";
+      DEBUG(errs() << "Internalized alias " << I->getName() << "\n");
     }
 
   return Changed;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 0c65443..02ac3bb 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Scalar.h"
@@ -33,23 +33,19 @@ using namespace llvm;
 STATISTIC(NumExtracted, "Number of loops extracted");
 
 namespace {
-  // FIXME: This is not a function pass, but the PassManager doesn't allow
-  // Module passes to require FunctionPasses, so we can't get loop info if we're
-  // not a function pass.
-  struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass {
+  struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     unsigned NumLoops;
 
     explicit LoopExtractor(unsigned numLoops = ~0) 
-      : FunctionPass(&ID), NumLoops(numLoops) {}
+      : LoopPass(&ID), NumLoops(numLoops) {}
 
-    virtual bool runOnFunction(Function &F);
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(BreakCriticalEdgesID);
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<DominatorTree>();
-      AU.addRequired<LoopInfo>();
     }
   };
 }
@@ -73,68 +69,50 @@ Y("loop-extract-single", "Extract at most one loop into a new function");
 // createLoopExtractorPass - This pass extracts all natural loops from the
 // program into a function if it can.
 //
-FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
 
-bool LoopExtractor::runOnFunction(Function &F) {
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-
-  // If this function has no loops, there is nothing to do.
-  if (LI.empty())
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // Only visit top-level loops.
+  if (L->getParentLoop())
     return false;
 
   DominatorTree &DT = getAnalysis<DominatorTree>();
-
-  // If there is more than one top-level loop in this function, extract all of
-  // the loops.
   bool Changed = false;
-  if (LI.end()-LI.begin() > 1) {
-    for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) {
-      if (NumLoops == 0) return Changed;
-      --NumLoops;
-      Changed |= ExtractLoop(DT, *i) != 0;
-      ++NumExtracted;
-    }
-  } else {
-    // Otherwise there is exactly one top-level loop.  If this function is more
-    // than a minimal wrapper around the loop, extract the loop.
-    Loop *TLL = *LI.begin();
-    bool ShouldExtractLoop = false;
-
-    // Extract the loop if the entry block doesn't branch to the loop header.
-    TerminatorInst *EntryTI = F.getEntryBlock().getTerminator();
-    if (!isa<BranchInst>(EntryTI) ||
-        !cast<BranchInst>(EntryTI)->isUnconditional() ||
-        EntryTI->getSuccessor(0) != TLL->getHeader())
-      ShouldExtractLoop = true;
-    else {
-      // Check to see if any exits from the loop are more than just return
-      // blocks.
-      SmallVector<BasicBlock*, 8> ExitBlocks;
-      TLL->getExitBlocks(ExitBlocks);
-      for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-        if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
-          ShouldExtractLoop = true;
-          break;
-        }
-    }
 
-    if (ShouldExtractLoop) {
-      if (NumLoops == 0) return Changed;
-      --NumLoops;
-      Changed |= ExtractLoop(DT, TLL) != 0;
-      ++NumExtracted;
-    } else {
-      // Okay, this function is a minimal container around the specified loop.
-      // If we extract the loop, we will continue to just keep extracting it
-      // infinitely... so don't extract it.  However, if the loop contains any
-      // subloops, extract them.
-      for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) {
-        if (NumLoops == 0) return Changed;
-        --NumLoops;
-        Changed |= ExtractLoop(DT, *i) != 0;
-        ++NumExtracted;
+  // If there is more than one top-level loop in this function, extract all of
+  // the loops. Otherwise there is exactly one top-level loop; in this case if
+  // this function is more than a minimal wrapper around the loop, extract
+  // the loop.
+  bool ShouldExtractLoop = false;
+
+  // Extract the loop if the entry block doesn't branch to the loop header.
+  TerminatorInst *EntryTI =
+    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  if (!isa<BranchInst>(EntryTI) ||
+      !cast<BranchInst>(EntryTI)->isUnconditional() ||
+      EntryTI->getSuccessor(0) != L->getHeader())
+    ShouldExtractLoop = true;
+  else {
+    // Check to see if any exits from the loop are more than just return
+    // blocks.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+        ShouldExtractLoop = true;
+        break;
       }
+  }
+  if (ShouldExtractLoop) {
+    if (NumLoops == 0) return Changed;
+    --NumLoops;
+    if (ExtractLoop(DT, L) != 0) {
+      Changed = true;
+      // After extraction, the loop is replaced by a function call, so
+      // we shouldn't try to run any more loop passes on it.
+      LPM.deleteLoopFromQueue(L);
     }
+    ++NumExtracted;
   }
 
   return Changed;
@@ -143,7 +121,7 @@ bool LoopExtractor::runOnFunction(Function &F) {
 // createSingleLoopExtractorPass - This pass extracts one natural loop from the
 // program into a function if it can.  This is used by bugpoint.
 //
-FunctionPass *llvm::createSingleLoopExtractorPass() {
+Pass *llvm::createSingleLoopExtractorPass() {
   return new SingleLoopExtractor();
 }
 
@@ -193,8 +171,8 @@ void BlockExtractorPass::LoadFile(const char *Filename) {
   // Load the BlockFile...
   std::ifstream In(Filename);
   if (!In.good()) {
-    cerr << "WARNING: BlockExtractor couldn't load file '" << Filename
-         << "'!\n";
+    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+           << "'!\n";
     return;
   }
   while (In) {
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index dfc040b..55194b3 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -39,6 +39,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
@@ -200,7 +201,7 @@ bool LowerSetJmp::runOnModule(Module& M) {
 // This function is always successful, unless it isn't.
 bool LowerSetJmp::doInitialization(Module& M)
 {
-  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
   const Type *SBPPTy = PointerType::getUnqual(SBPTy);
 
   // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
@@ -208,33 +209,40 @@ bool LowerSetJmp::doInitialization(Module& M)
 
   // void __llvm_sjljeh_init_setjmpmap(void**)
   InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
-                                    Type::VoidTy, SBPPTy, (Type *)0);
+                                    Type::getVoidTy(M.getContext()),
+                                    SBPPTy, (Type *)0);
   // void __llvm_sjljeh_destroy_setjmpmap(void**)
   DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
-                                       Type::VoidTy, SBPPTy, (Type *)0);
+                                       Type::getVoidTy(M.getContext()),
+                                       SBPPTy, (Type *)0);
 
   // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
   AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
-                                     Type::VoidTy, SBPPTy, SBPTy,
-                                     Type::Int32Ty, (Type *)0);
+                                     Type::getVoidTy(M.getContext()),
+                                     SBPPTy, SBPTy,
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
 
   // void __llvm_sjljeh_throw_longjmp(int*, int)
   ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
-                                       Type::VoidTy, SBPTy, Type::Int32Ty,
+                                       Type::getVoidTy(M.getContext()), SBPTy, 
+                                       Type::getInt32Ty(M.getContext()),
                                        (Type *)0);
 
   // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
   TryCatchLJ =
     M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
-                          Type::Int32Ty, SBPPTy, (Type *)0);
+                          Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
 
   // bool __llvm_sjljeh_is_longjmp_exception()
   IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
-                                        Type::Int1Ty, (Type *)0);
+                                        Type::getInt1Ty(M.getContext()),
+                                        (Type *)0);
 
   // int __llvm_sjljeh_get_longjmp_value()
   GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
-                                     Type::Int32Ty, (Type *)0);
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
   return true;
 }
 
@@ -257,7 +265,8 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {
 // throwing the exception for us.
 void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
 {
-  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type* SBPTy =
+        Type::getInt8PtrTy(Inst->getContext());
 
   // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
   // same parameters as "longjmp", except that the buffer is cast to a
@@ -278,7 +287,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   if (SVP.first)
     BranchInst::Create(SVP.first->getParent(), Inst);
   else
-    new UnwindInst(Inst);
+    new UnwindInst(Inst->getContext(), Inst);
 
   // Remove all insts after the branch/unwind inst.  Go from back to front to
   // avoid replaceAllUsesWith if possible.
@@ -309,7 +318,8 @@ AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
   assert(Inst && "Couldn't find even ONE instruction in entry block!");
 
   // Fill in the alloca and call to initialize the SJ map.
-  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *SBPTy =
+        Type::getInt8PtrTy(Func->getContext());
   AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
   CallInst::Create(InitSJMap, Map, "", Inst);
   return SJMap[Func] = Map;
@@ -324,12 +334,13 @@ BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
 
   // The basic block we're going to jump to if we need to rethrow the
   // exception.
-  BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func);
+  BasicBlock* Rethrow =
+        BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
 
   // Fill in the "Rethrow" BB with a call to rethrow the exception. This
   // is the last instruction in the BB since at this point the runtime
   // should exit this function and go to the next function.
-  new UnwindInst(Rethrow);
+  new UnwindInst(Func->getContext(), Rethrow);
   return RethrowBBMap[Func] = Rethrow;
 }
 
@@ -340,7 +351,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
 {
   if (SwitchValMap[Func].first) return SwitchValMap[Func];
 
-  BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func);
+  BasicBlock* LongJmpPre =
+        BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
 
   // Keep track of the preliminary basic block for some of the other
   // transformations.
@@ -352,7 +364,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
   // The "decision basic block" gets the number associated with the
   // setjmp call returning to switch on and the value returned by
   // longjmp.
-  BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func);
+  BasicBlock* DecisionBB =
+        BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
 
   BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
 
@@ -375,12 +388,13 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
   Function* Func = ABlock->getParent();
 
   // Add this setjmp to the setjmp map.
-  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type* SBPTy =
+          Type::getInt8PtrTy(Inst->getContext());
   CastInst* BufPtr = 
     new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
   std::vector<Value*> Args = 
     make_vector<Value*>(GetSetJmpMap(Func), BufPtr,
-                        ConstantInt::get(Type::Int32Ty,
+                        ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
                                          SetJmpIDMap[Func]++), 0);
   CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst);
 
@@ -424,14 +438,17 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
 
   // This PHI node will be in the new block created from the
   // splitBasicBlock call.
-  PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst);
+  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+                                 "SetJmpReturn", Inst);
 
   // Coming from a call to setjmp, the return is 0.
-  PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock);
+  PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+                   ABlock);
 
   // Add the case for this setjmp's number...
   SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
-  SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1),
+  SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+                                      SetJmpIDMap[Func] - 1),
                      SetJmpContBlock);
 
   // Value coming from the handling of the exception.
@@ -503,7 +520,8 @@ void LowerSetJmp::visitInvokeInst(InvokeInst& II)
   BasicBlock* ExceptBB = II.getUnwindDest();
 
   Function* Func = BB->getParent();
-  BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func);
+  BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), 
+                                               "InvokeExcept", Func);
 
   // If this is a longjmp exception, then branch to the preliminary BB of
   // the longjmp exception handling. Otherwise, go to the old exception.
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 5693cc0..13bbf9c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -47,11 +47,14 @@
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <vector>
 using namespace llvm;
@@ -61,7 +64,7 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged");
 namespace {
   struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    MergeFunctions() : ModulePass((intptr_t)&ID) {}
+    MergeFunctions() : ModulePass(&ID) {}
 
     bool runOnModule(Module &M);
   };
@@ -127,7 +130,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
     return false;
 
   default:
-    assert(0 && "Unknown type!");
+    llvm_unreachable("Unknown type!");
     return false;
 
   case Type::PointerTyID: {
@@ -185,7 +188,8 @@ static bool
 isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
   if (I1->getOpcode() != I2->getOpcode() ||
       I1->getNumOperands() != I2->getNumOperands() ||
-      !isEquivalentType(I1->getType(), I2->getType()))
+      !isEquivalentType(I1->getType(), I2->getType()) ||
+      !I1->hasSameSubclassOptionalData(I2))
     return false;
 
   // We have two instructions of identical opcode and #operands.  Check to see
@@ -449,6 +453,7 @@ static LinkageCategory categorize(const Function *F) {
   switch (F->getLinkage()) {
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
     return Internal;
 
   case GlobalValue::WeakAnyLinkage:
@@ -468,14 +473,14 @@ static LinkageCategory categorize(const Function *F) {
     return ExternalStrong;
   }
 
-  assert(0 && "Unknown LinkageType.");
+  llvm_unreachable("Unknown LinkageType.");
   return ExternalWeak;
 }
 
 static void ThunkGToF(Function *F, Function *G) {
   Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
                                     G->getParent());
-  BasicBlock *BB = BasicBlock::Create("", NewG);
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
 
   std::vector<Value *> Args;
   unsigned i = 0;
@@ -494,13 +499,13 @@ static void ThunkGToF(Function *F, Function *G) {
   CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
-  if (NewG->getReturnType() == Type::VoidTy) {
-    ReturnInst::Create(BB);
+  if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) {
+    ReturnInst::Create(F->getContext(), BB);
   } else if (CI->getType() != NewG->getReturnType()) {
     Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
-    ReturnInst::Create(BCI, BB);
+    ReturnInst::Create(F->getContext(), BCI, BB);
   } else {
-    ReturnInst::Create(CI, BB);
+    ReturnInst::Create(F->getContext(), CI, BB);
   }
 
   NewG->copyAttributesFrom(G);
@@ -574,22 +579,22 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
     case Internal:
       switch (catG) {
         case ExternalStrong:
-          assert(0);
+          llvm_unreachable(0);
           // fall-through
         case ExternalWeak:
-	  if (F->hasAddressTaken())
+          if (F->hasAddressTaken())
             ThunkGToF(F, G);
           else
             AliasGToF(F, G);
-	  break;
+          break;
         case Internal: {
           bool addrTakenF = F->hasAddressTaken();
           bool addrTakenG = G->hasAddressTaken();
           if (!addrTakenF && addrTakenG) {
             std::swap(FnVec[i], FnVec[j]);
             std::swap(F, G);
-	    std::swap(addrTakenF, addrTakenG);
-	  }
+            std::swap(addrTakenF, addrTakenG);
+          }
 
           if (addrTakenF && addrTakenG) {
             ThunkGToF(F, G);
@@ -597,7 +602,7 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
             assert(!addrTakenG);
             AliasGToF(F, G);
           }
-	} break;
+        } break;
       }
       break;
   }
@@ -629,19 +634,19 @@ bool MergeFunctions::runOnModule(Module &M) {
   bool LocalChanged;
   do {
     LocalChanged = false;
-    DOUT << "size: " << FnMap.size() << "\n";
+    DEBUG(errs() << "size: " << FnMap.size() << "\n");
     for (std::map<unsigned long, std::vector<Function *> >::iterator
          I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
       std::vector<Function *> &FnVec = I->second;
-      DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";
+      DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
 
       for (int i = 0, e = FnVec.size(); i != e; ++i) {
         for (int j = i + 1; j != e; ++j) {
           bool isEqual = equals(FnVec[i], FnVec[j]);
 
-          DOUT << "  " << FnVec[i]->getName()
-               << (isEqual ? " == " : " != ")
-               << FnVec[j]->getName() << "\n";
+          DEBUG(errs() << "  " << FnVec[i]->getName()
+                << (isEqual ? " == " : " != ")
+                << FnVec[j]->getName() << "\n");
 
           if (isEqual) {
             if (fold(FnVec, i, j)) {
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 73ec9c1..8f858d3 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -48,7 +48,8 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
 Function* PartialInliner::unswitchFunction(Function* F) {
   // First, verify that this function is an unswitching candidate...
   BasicBlock* entryBlock = F->begin();
-  if (!isa<BranchInst>(entryBlock->getTerminator()))
+  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  if (!BR || BR->isUnconditional())
     return 0;
   
   BasicBlock* returnBlock = 0;
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 2b52f46..daf81e9 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
@@ -40,7 +41,7 @@ namespace {
     PruneEH() : CallGraphSCCPass(&ID) {}
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    bool runOnSCC(std::vector<CallGraphNode *> &SCC);
 
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
@@ -54,7 +55,7 @@ X("prune-eh", "Remove unused exception handling info");
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 
 
-bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   SmallPtrSet<CallGraphNode *, 8> SCCNodes;
   CallGraph &CG = getAnalysis<CallGraph>();
   bool MadeChange = false;
@@ -164,9 +165,6 @@ bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
 // function if we have invokes to non-unwinding functions or code after calls to
 // no-return functions.
 bool PruneEH::SimplifyFunction(Function *F) {
-  CallGraph &CG = getAnalysis<CallGraph>();
-  CallGraphNode *CGN = CG[F];
-
   bool MadeChange = false;
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
@@ -180,14 +178,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
         Call->setAttributes(II->getAttributes());
 
         // Anything that used the value produced by the invoke instruction
-        // now uses the value produced by the call instruction.
+        // now uses the value produced by the call instruction.  Note that we
+        // do this even for void functions and calls with no uses so that the
+        // callgraph edge is updated.
         II->replaceAllUsesWith(Call);
         BasicBlock *UnwindBlock = II->getUnwindDest();
         UnwindBlock->removePredecessor(II->getParent());
 
-        // Fix up the call graph.
-        CGN->replaceCallSite(II, Call);
-
         // Insert a branch to the normal destination right before the
         // invoke.
         BranchInst::Create(II->getNormalDest(), II);
@@ -214,7 +211,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
 
           // Remove the uncond branch and add an unreachable.
           BB->getInstList().pop_back();
-          new UnreachableInst(BB);
+          new UnreachableInst(BB->getContext(), BB);
 
           DeleteBasicBlock(New);  // Delete the new BB.
           MadeChange = true;
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index 9900368..4c1f26d 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
@@ -69,7 +70,6 @@ ModulePass *llvm::createRaiseAllocationsPass() {
 // function into the appropriate instruction.
 //
 void RaiseAllocations::doInitialization(Module &M) {
-
   // Get Malloc and free prototypes if they exist!
   MallocFunc = M.getFunction("malloc");
   if (MallocFunc) {
@@ -77,22 +77,27 @@ void RaiseAllocations::doInitialization(Module &M) {
 
     // Get the expected prototype for malloc
     const FunctionType *Malloc1Type = 
-      FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
-                      std::vector<const Type*>(1, Type::Int64Ty), false);
+      FunctionType::get(Type::getInt8PtrTy(M.getContext()),
+                      std::vector<const Type*>(1,
+                                      Type::getInt64Ty(M.getContext())), false);
 
     // Chck to see if we got the expected malloc
     if (TyWeHave != Malloc1Type) {
       // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
       // This handles the common declaration of: 'void *malloc(unsigned);'
       const FunctionType *Malloc2Type = 
-        FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
-                          std::vector<const Type*>(1, Type::Int32Ty), false);
+        FunctionType::get(PointerType::getUnqual(
+                          Type::getInt8Ty(M.getContext())),
+                          std::vector<const Type*>(1, 
+                                      Type::getInt32Ty(M.getContext())), false);
       if (TyWeHave != Malloc2Type) {
         // Check to see if the prototype is missing, giving us 
         // i8*(...) * malloc
         // This handles the common declaration of: 'void *malloc();'
         const FunctionType *Malloc3Type = 
-          FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true);
+          FunctionType::get(PointerType::getUnqual(
+                                    Type::getInt8Ty(M.getContext())), 
+                                    true);
         if (TyWeHave != Malloc3Type)
           // Give up
           MallocFunc = 0;
@@ -105,19 +110,24 @@ void RaiseAllocations::doInitialization(Module &M) {
     const FunctionType* TyWeHave = FreeFunc->getFunctionType();
     
     // Get the expected prototype for void free(i8*)
-    const FunctionType *Free1Type = FunctionType::get(Type::VoidTy,
-      std::vector<const Type*>(1, PointerType::getUnqual(Type::Int8Ty)), false);
+    const FunctionType *Free1Type =
+      FunctionType::get(Type::getVoidTy(M.getContext()),
+        std::vector<const Type*>(1, PointerType::getUnqual(
+                                 Type::getInt8Ty(M.getContext()))), 
+                                 false);
 
     if (TyWeHave != Free1Type) {
       // Check to see if the prototype was forgotten, giving us 
       // void (...) * free
       // This handles the common forward declaration of: 'void free();'
-      const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true);
+      const FunctionType* Free2Type =
+                    FunctionType::get(Type::getVoidTy(M.getContext()), true);
 
       if (TyWeHave != Free2Type) {
         // One last try, check to see if we can find free as 
         // int (...)* free.  This handles the case where NOTHING was declared.
-        const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true);
+        const FunctionType* Free3Type =
+                    FunctionType::get(Type::getInt32Ty(M.getContext()), true);
         
         if (TyWeHave != Free3Type) {
           // Give up.
@@ -137,7 +147,7 @@ void RaiseAllocations::doInitialization(Module &M) {
 bool RaiseAllocations::runOnModule(Module &M) {
   // Find the malloc/free prototypes...
   doInitialization(M);
-
+  
   bool Changed = false;
 
   // First, process all of the malloc calls...
@@ -159,12 +169,15 @@ bool RaiseAllocations::runOnModule(Module &M) {
 
           // If no prototype was provided for malloc, we may need to cast the
           // source size.
-          if (Source->getType() != Type::Int32Ty)
+          if (Source->getType() != Type::getInt32Ty(M.getContext()))
             Source = 
-              CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/,
+              CastInst::CreateIntegerCast(Source, 
+                                          Type::getInt32Ty(M.getContext()), 
+                                          false/*ZExt*/,
                                           "MallocAmtCast", I);
 
-          MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I);
+          MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()),
+                                          Source, "", I);
           MI->takeName(I);
           I->replaceAllUsesWith(MI);
 
@@ -216,7 +229,7 @@ bool RaiseAllocations::runOnModule(Module &M) {
           Value *Source = *CS.arg_begin();
           if (!isa<PointerType>(Source->getType()))
             Source = new IntToPtrInst(Source,           
-                                      PointerType::getUnqual(Type::Int8Ty), 
+                        Type::getInt8PtrTy(M.getContext()), 
                                       "FreePtrCast", I);
           new FreeInst(Source, I);
 
@@ -226,7 +239,7 @@ bool RaiseAllocations::runOnModule(Module &M) {
             BranchInst::Create(II->getNormalDest(), I);
 
           // Delete the old call site
-          if (I->getType() != Type::VoidTy)
+          if (I->getType() != Type::getVoidTy(M.getContext()))
             I->replaceAllUsesWith(UndefValue::get(I->getType()));
           I->eraseFromParent();
           Changed = true;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 046e044..77d44b2 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -24,18 +24,18 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
-  class VISIBILITY_HIDDEN StripSymbols : public ModulePass {
+  class StripSymbols : public ModulePass {
     bool OnlyDebugInfo;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -49,7 +49,7 @@ namespace {
     }
   };
 
-  class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass {
+  class StripNonDebugSymbols : public ModulePass {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripNonDebugSymbols()
@@ -62,7 +62,7 @@ namespace {
     }
   };
 
-  class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass {
+  class StripDebugDeclare : public ModulePass {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDebugDeclare()
@@ -138,7 +138,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
     Value *V = VI->getValue();
     ++VI;
     if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
-      if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
         // Set name to "", removing from symbol table!
         V->setName("");
     }
@@ -156,43 +156,37 @@ static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
 }
 
 /// Find values that are marked as llvm.used.
-void findUsedValues(Module &M,
-                    SmallPtrSet<const GlobalValue*, 8>& llvmUsedValues) {
-  if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) {
-    llvmUsedValues.insert(LLVMUsed);
-    // Collect values that are preserved as per explicit request.
-    // llvm.used is used to list these values.
-    if (ConstantArray *Inits = 
-        dyn_cast<ConstantArray>(LLVMUsed->getInitializer())) {
-      for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
-        if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i)))
-          llvmUsedValues.insert(GV);
-        else if (ConstantExpr *CE =
-                 dyn_cast<ConstantExpr>(Inits->getOperand(i)))
-          if (CE->getOpcode() == Instruction::BitCast)
-            if (GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0)))
-              llvmUsedValues.insert(GV);
-      }
-    }
-  }
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  UsedValues.insert(LLVMUsed);
+  
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
 }
 
 /// StripSymbolNames - Strip symbol names.
-bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 
   SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
-  findUsedValues(M, llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
 
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
     if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
-      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
   }
   
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
-      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8))
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
     StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
   }
@@ -206,169 +200,58 @@ bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 // StripDebugInfo - Strip debug info in the module if it exists.  
 // To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and 
 // llvm.dbg.region.end calls, and any globals they point to if now dead.
-bool StripDebugInfo(Module &M) {
-
-  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
-  findUsedValues(M, llvmUsedValues);
-
-  SmallVector<GlobalVariable *, 2> CUs;
-  SmallVector<GlobalVariable *, 4> GVs;
-  SmallVector<GlobalVariable *, 4> SPs;
-  CollectDebugInfoAnchors(M, CUs, GVs, SPs);
-  // These anchors use LinkOnce linkage so that the optimizer does not
-  // remove them accidently. Set InternalLinkage for all these debug
-  // info anchors.
-  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
-         E = GVs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
-         E = SPs.end(); I != E; ++I)
-    (*I)->setLinkage(GlobalValue::InternalLinkage);
-
-
- // Delete all dbg variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-       I != E; ++I) {
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
-    if (!GV) continue;
-    if (!GV->use_empty() && llvmUsedValues.count(I) == 0) {
-      if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) {
-        GV->replaceAllUsesWith(UndefValue::get(GV->getType()));
-      }
-    }
-  }
+static bool StripDebugInfo(Module &M) {
 
+  // Remove all of the calls to the debugger intrinsics, and remove them from
+  // the module.
   Function *FuncStart = M.getFunction("llvm.dbg.func.start");
   Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
   Function *RegionStart = M.getFunction("llvm.dbg.region.start");
   Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
   Function *Declare = M.getFunction("llvm.dbg.declare");
 
-  std::vector<Constant*> DeadConstants;
-
-  // Remove all of the calls to the debugger intrinsics, and remove them from
-  // the module.
   if (FuncStart) {
     while (!FuncStart->use_empty()) {
       CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     FuncStart->eraseFromParent();
   }
   if (StopPoint) {
     while (!StopPoint->use_empty()) {
       CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      Value *Arg = CI->getOperand(3);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     StopPoint->eraseFromParent();
   }
   if (RegionStart) {
     while (!RegionStart->use_empty()) {
       CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     RegionStart->eraseFromParent();
   }
   if (RegionEnd) {
     while (!RegionEnd->use_empty()) {
       CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      Value *Arg = CI->getOperand(1);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg)) 
-          DeadConstants.push_back(C);
     }
     RegionEnd->eraseFromParent();
   }
   if (Declare) {
     while (!Declare->use_empty()) {
       CallInst *CI = cast<CallInst>(Declare->use_back());
-      Value *Arg1 = CI->getOperand(1);
-      Value *Arg2 = CI->getOperand(2);
-      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
-      if (Arg1->use_empty()) {
-        if (Constant *C = dyn_cast<Constant>(Arg1)) 
-          DeadConstants.push_back(C);
-        else 
-          RecursivelyDeleteTriviallyDeadInstructions(Arg1);
-      }
-      if (Arg2->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg2)) 
-          DeadConstants.push_back(C);
     }
     Declare->eraseFromParent();
   }
 
-  // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce
-  // but since we are removing all debug information, make them internal now.
-  // FIXME: Use private linkage maybe?
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
-
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
- 
-  if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables"))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-      GV->setLinkage(GlobalValue::InternalLinkage);
-
-  // Delete all dbg variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-       I != E; ++I) {
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
-    if (!GV) continue;
-    if (GV->use_empty() && llvmUsedValues.count(I) == 0
-        && (!GV->hasSection() 
-            || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0))
-      DeadConstants.push_back(GV);
-  }
-
-  if (DeadConstants.empty())
-    return false;
+  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
+  if (NMD)
+    NMD->eraseFromParent();
 
-  // Delete any internal globals that were only used by the debugger intrinsics.
-  while (!DeadConstants.empty()) {
-    Constant *C = DeadConstants.back();
-    DeadConstants.pop_back();
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-      if (GV->hasLocalLinkage())
-        RemoveDeadConstant(GV);
-    }
-    else
-      RemoveDeadConstant(C);
-  }
-
-  // Remove all llvm.dbg types.
-  TypeSymbolTable &ST = M.getTypeSymbolTable();
-  for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) {
-    if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9))
-      ST.remove(TI++);
-    else 
-      ++TI;
-  }
-  
+  // Remove dead metadata.
+  M.getContext().RemoveDeadMetadata();
   return true;
 }
 
@@ -414,8 +297,7 @@ bool StripDebugDeclare::runOnModule(Module &M) {
        I != E; ++I) {
     GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
     if (!GV) continue;
-    if (GV->use_empty() && GV->hasName() 
-        && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0)
+    if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable"))
       DeadConstants.push_back(GV);
   }
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index 9f54388..4442820 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Instructions.h"
@@ -34,6 +35,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
@@ -47,15 +49,15 @@ namespace {
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
+    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);
     static char ID; // Pass identification, replacement for typeid
     SRETPromotion() : CallGraphSCCPass(&ID) {}
 
   private:
-    bool PromoteReturn(CallGraphNode *CGN);
+    CallGraphNode *PromoteReturn(CallGraphNode *CGN);
     bool isSafeToUpdateAllCallers(Function *F);
     Function *cloneFunctionBody(Function *F, const StructType *STy);
-    void updateCallSites(Function *F, Function *NF);
+    CallGraphNode *updateCallSites(Function *F, Function *NF);
     bool nestedStructType(const StructType *STy);
   };
 }
@@ -68,49 +70,54 @@ Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
 }
 
-bool SRETPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {
+bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {
   bool Changed = false;
 
   for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-    Changed |= PromoteReturn(SCC[i]);
+    if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) {
+      SCC[i] = NewNode;
+      Changed = true;
+    }
 
   return Changed;
 }
 
 /// PromoteReturn - This method promotes function that uses StructRet paramater 
-/// into a function that uses mulitple return value.
-bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
+/// into a function that uses multiple return values.
+CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
   Function *F = CGN->getFunction();
 
   if (!F || F->isDeclaration() || !F->hasLocalLinkage())
-    return false;
+    return 0;
 
   // Make sure that function returns struct.
   if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
-    return false;
+    return 0;
 
-  DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n";
+  DEBUG(errs() << "SretPromotion: Looking at sret function " 
+        << F->getName() << "\n");
 
-  assert (F->getReturnType() == Type::VoidTy && "Invalid function return type");
+  assert(F->getReturnType() == Type::getVoidTy(F->getContext()) &&
+         "Invalid function return type");
   Function::arg_iterator AI = F->arg_begin();
   const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
-  assert (FArgType && "Invalid sret parameter type");
+  assert(FArgType && "Invalid sret parameter type");
   const llvm::StructType *STy = 
     dyn_cast<StructType>(FArgType->getElementType());
-  assert (STy && "Invalid sret parameter element type");
+  assert(STy && "Invalid sret parameter element type");
 
   // Check if it is ok to perform this promotion.
   if (isSafeToUpdateAllCallers(F) == false) {
-    DOUT << "SretPromotion: Not all callers can be updated\n";
+    DEBUG(errs() << "SretPromotion: Not all callers can be updated\n");
     NumRejectedSRETUses++;
-    return false;
+    return 0;
   }
 
-  DOUT << "SretPromotion: sret argument will be promoted\n";
+  DEBUG(errs() << "SretPromotion: sret argument will be promoted\n");
   NumSRET++;
   // [1] Replace use of sret parameter 
-  AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv", 
-                                          F->getEntryBlock().begin());
+  AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
+                                         F->getEntryBlock().begin());
   Value *NFirstArg = F->arg_begin();
   NFirstArg->replaceAllUsesWith(TheAlloca);
 
@@ -121,7 +128,7 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
       ++BI;
       if (isa<ReturnInst>(I)) {
         Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
-        ReturnInst *NR = ReturnInst::Create(NV, I);
+        ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
         I->replaceAllUsesWith(NR);
         I->eraseFromParent();
       }
@@ -131,11 +138,13 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
   Function *NF = cloneFunctionBody(F, STy);
 
   // [4] Update all call sites to use new function
-  updateCallSites(F, NF);
+  CallGraphNode *NF_CFN = updateCallSites(F, NF);
 
-  F->eraseFromParent();
-  getAnalysis<CallGraph>().changeFunction(F, NF);
-  return true;
+  CallGraph &CG = getAnalysis<CallGraph>();
+  NF_CFN->stealCalledFunctionsFrom(CG[F]);
+
+  delete CG.removeFunctionFromModule(F);
+  return NF_CFN;
 }
 
 // Check if it is ok to perform this promotion.
@@ -243,23 +252,26 @@ Function *SRETPromotion::cloneFunctionBody(Function *F,
   Function::arg_iterator NI = NF->arg_begin();
   ++I;
   while (I != E) {
-      I->replaceAllUsesWith(NI);
-      NI->takeName(I);
-      ++I;
-      ++NI;
+    I->replaceAllUsesWith(NI);
+    NI->takeName(I);
+    ++I;
+    ++NI;
   }
 
   return NF;
 }
 
 /// updateCallSites - Update all sites that call F to use NF.
-void SRETPromotion::updateCallSites(Function *F, Function *NF) {
+CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
   CallGraph &CG = getAnalysis<CallGraph>();
   SmallVector<Value*, 16> Args;
 
   // Attributes - Keep track of the parameter attributes for the arguments.
   SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
 
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
   while (!F->use_empty()) {
     CallSite CS = CallSite::get(*F->use_begin());
     Instruction *Call = CS.getInstruction();
@@ -309,8 +321,10 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {
     New->takeName(Call);
 
     // Update the callgraph to know that the callsite has been transformed.
-    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New);
-
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->removeCallEdgeFor(Call);
+    CalleeNode->addCalledFunction(New, NF_CGN);
+    
     // Update all users of sret parameter to extract value using extractvalue.
     for (Value::use_iterator UI = FirstCArg->use_begin(), 
            UE = FirstCArg->use_end(); UI != UE; ) {
@@ -318,24 +332,25 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {
       CallInst *C2 = dyn_cast<CallInst>(U2);
       if (C2 && (C2 == Call))
         continue;
-      else if (GetElementPtrInst *UGEP = dyn_cast<GetElementPtrInst>(U2)) {
-        ConstantInt *Idx = dyn_cast<ConstantInt>(UGEP->getOperand(2));
-        assert (Idx && "Unexpected getelementptr index!");
-        Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
-                                             "evi", UGEP);
-        while(!UGEP->use_empty()) {
-          // isSafeToUpdateAllCallers has checked that all GEP uses are
-          // LoadInsts
-          LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
-          L->replaceAllUsesWith(GR);
-          L->eraseFromParent();
-        }
-        UGEP->eraseFromParent();
+      
+      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
+      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
+      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
+                                           "evi", UGEP);
+      while(!UGEP->use_empty()) {
+        // isSafeToUpdateAllCallers has checked that all GEP uses are
+        // LoadInsts
+        LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
+        L->replaceAllUsesWith(GR);
+        L->eraseFromParent();
       }
-      else assert( 0 && "Unexpected sret parameter use");
+      UGEP->eraseFromParent();
+      continue;
     }
     Call->eraseFromParent();
   }
+  
+  return NF_CGN;
 }
 
 /// nestedStructType - Return true if STy includes any
@@ -344,7 +359,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) {
   unsigned Num = STy->getNumElements();
   for (unsigned i = 0; i < Num; i++) {
     const Type *Ty = STy->getElementType(i);
-    if (!Ty->isSingleValueType() && Ty != Type::VoidTy)
+    if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext()))
       return true;
   }
   return false;
diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp
index 2bd9809..eb8f225 100644
--- a/lib/Transforms/Instrumentation/BlockProfiling.cpp
+++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp
@@ -19,12 +19,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "RSProfiling.h"
 #include "ProfilingUtils.h"
@@ -52,8 +51,8 @@ ModulePass *llvm::createFunctionProfilerPass() {
 bool FunctionProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert function profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert function profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
@@ -62,10 +61,11 @@ bool FunctionProfiler::runOnModule(Module &M) {
     if (!I->isDeclaration())
       ++NumFunctions;
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()),
+                                   NumFunctions);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "FuncProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "FuncProfCounters");
 
   // Instrument all of the functions...
   unsigned i = 0;
@@ -98,26 +98,29 @@ ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }
 bool BlockProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert block profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert block profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
   unsigned NumBlocks = 0;
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    NumBlocks += I->size();
+    if (!I->isDeclaration())
+      NumBlocks += I->size();
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "BlockProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "BlockProfCounters");
 
   // Instrument all of the blocks...
   unsigned i = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->isDeclaration()) continue;
     for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
       // Insert counter at the start of the block
       IncrementCounterInBlock(BB, i++, Counters);
+  }
 
   // Add the initialization call to main.
   InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters);
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index d7c518d..494928e 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMInstrumentation
   BlockProfiling.cpp
   EdgeProfiling.cpp
+  OptimalEdgeProfiling.cpp
   ProfilingUtils.cpp
   RSProfiling.cpp
   )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 0831f3b..b9cb275 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -16,25 +16,30 @@
 // number of counters inserted.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "insert-edge-profiling"
 #include "ProfilingUtils.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
 namespace {
   class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
     EdgeProfiler() : ModulePass(&ID) {}
+
+    virtual const char *getPassName() const {
+      return "Edge Profiler";
+    }
   };
 }
 
@@ -47,14 +52,17 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
 bool EdgeProfiler::runOnModule(Module &M) {
   Function *Main = M.getFunction("main");
   if (Main == 0) {
-    cerr << "WARNING: cannot insert edge profiling into a module"
-         << " with no main function!\n";
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
     return false;  // No main, no instrumentation!
   }
 
   std::set<BasicBlock*> BlocksToInstrument;
   unsigned NumEdges = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
       // Keep track of which blocks need to be instrumented.  We don't want to
       // instrument blocks that are added as the result of breaking critical
@@ -62,15 +70,20 @@ bool EdgeProfiler::runOnModule(Module &M) {
       BlocksToInstrument.insert(BB);
       NumEdges += BB->getTerminator()->getNumSuccessors();
     }
+  }
 
-  const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges);
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
   GlobalVariable *Counters =
-    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "EdgeProfCounters", &M);
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "EdgeProfCounters");
+  NumEdgesInserted = NumEdges;
 
   // Instrument all of the edges...
   unsigned i = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Create counter for (0,entry) edge.
+    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
         // Okay, we have to add a counter of each outgoing edge.  If the
@@ -93,6 +106,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
           }
         }
       }
+  }
 
   // Add the initialization call to main.
   InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 0000000..2951dbc
--- /dev/null
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,95 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module privides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+  /// MaximumSpanningTree - A MST implementation.
+  /// The type parameter T determines the type of the nodes of the graph.
+  template <typename T>
+  class MaximumSpanningTree {
+
+    // A comparing class for comparing weighted edges.
+    template <typename CT>
+    struct EdgeWeightCompare {
+      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, 
+                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+        if (X.second > Y.second) return true;
+        if (X.second < Y.second) return false;
+        return false;
+      }
+    };
+
+  public:
+    typedef std::pair<const T*, const T*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::vector<EdgeWeight> EdgeWeights;
+  protected:
+    typedef std::vector<Edge> MaxSpanTree;
+
+    MaxSpanTree MST;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+    /// spanning tree.
+    MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+      // Create spanning tree, Forest contains a special data structure
+      // that makes checking if two nodes are already in a common (sub-)tree
+      // fast and cheap.
+      EquivalenceClasses<const T*> Forest;
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        Forest.insert(e.first);
+        Forest.insert(e.second);
+      }
+
+      // Iterate over the sorted edges, biggest first.
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+          Forest.unionSets(e.first, e.second);
+          // So we know now that the edge is not already in a subtree, so we push
+          // the edge to the MST.
+          MST.push_back(e);
+        }
+      }
+    }
+
+    typename MaxSpanTree::iterator begin() {
+      return MST.begin();
+    }
+
+    typename MaxSpanTree::iterator end() {
+      return MST.end();
+    }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 0000000..b2e6747
--- /dev/null
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,219 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    OptimalEdgeProfiler() : ModulePass(&ID) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(ProfileEstimatorPassID);
+      AU.addRequired<ProfileInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Optimal Edge Profiler";
+    }
+  };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+static RegisterPass<OptimalEdgeProfiler>
+X("insert-optimal-edge-profiling", 
+  "Insert optimal instrumentation for edge profiling");
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+  return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+                                    BasicBlock* b,
+                                    unsigned i) {
+  DEBUG(errs() << "--Edge Counter for " << (e) << " in " \
+               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  // NumEdges counts all the edges that may be instrumented. Later on its
+  // decided which edges to actually instrument, to achieve optimal profiling.
+  // For the entry block a virtual edge (0,entry) is reserved, for each block
+  // with no successors an edge (BB,0) is reserved. These edges are necessary
+  // to calculate a truly optimal maximum spanning tree and thus an optimal
+  // instrumentation.
+  unsigned NumEdges = 0;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      if (BB->getTerminator()->getNumSuccessors() == 0) {
+        // Reserve space for (BB,0) edge.
+        ++NumEdges;
+      } else {
+        NumEdges += BB->getTerminator()->getNumSuccessors();
+      }
+    }
+  }
+
+  // In the profiling output a counter for each edge is reserved, but only few
+  // are used. This is done to be able to read back in the profile without
+  // calulating the maximum spanning tree again, instead each edge counter that
+  // is not used is initialised with -1 to signal that this edge counter has to
+  // be calculated from other edge counters on reading the profile info back
+  // in.
+
+  const Type *Int32 = Type::getInt32Ty(M.getContext());
+  const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
+  NumEdgesInserted = 0;
+
+  std::vector<Constant*> Initializer(NumEdges);
+  Constant* Zero = ConstantInt::get(Int32, 0);
+  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+  // Instrument all of the edges not in MST...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+
+    // Calculate a Maximum Spanning Tree with the edge weights determined by
+    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+    // edges also participate in the maximum spanning tree calculation. 
+    // The third parameter of MaximumSpanningTree() has the effect that not the
+    // actual MST is returned but the edges _not_ in the MST.
+
+    ProfileInfo::EdgeWeights ECs = 
+      getAnalysisID<ProfileInfo>(ProfileEstimatorPassID, *F).getEdgeWeights(F);
+    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+    MaximumSpanningTree<BasicBlock> MST (EdgeVector);
+    std::stable_sort(MST.begin(),MST.end());
+
+    // Check if (0,entry) not in the MST. If not, instrument edge
+    // (IncrementCounterInBlock()) and set the counter initially to zero, if
+    // the edge is in the MST the counter is initialised to -1.
+
+    BasicBlock *entry = &(F->getEntryBlock());
+    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+      printEdgeCounter(edge,entry,i);
+      IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++;
+      Initializer[i++] = (Zero);
+    } else{
+      Initializer[i++] = (Uncounted);
+    }
+
+    // InsertedBlocks contains all blocks that were inserted for splitting an
+    // edge, this blocks do not have to be instrumented.
+    DenseSet<BasicBlock*> InsertedBlocks;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Check if block was not inserted and thus does not have to be
+      // instrumented.
+      if (InsertedBlocks.count(BB)) continue;
+
+      // Okay, we have to add a counter of each outgoing edge not in MST. If
+      // the outgoing edge is not critical don't split it, just insert the
+      // counter in the source or destination of the edge. Also, if the block
+      // has no successors, the virtual edge (BB,0) is processed.
+      TerminatorInst *TI = BB->getTerminator();
+      if (TI->getNumSuccessors() == 0) {
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+          printEdgeCounter(edge,BB,i);
+          IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+          Initializer[i++] = (Zero);
+        } else{
+          Initializer[i++] = (Uncounted);
+        }
+      }
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock *Succ = TI->getSuccessor(s);
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+          // If the edge is critical, split it.
+          bool wasInserted = SplitCriticalEdge(TI, s, this);
+          Succ = TI->getSuccessor(s);
+          if (wasInserted)
+            InsertedBlocks.insert(Succ);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If
+          // we only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,BB,i);
+            IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+          } else {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,Succ,i);
+            IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++;
+          }
+          Initializer[i++] = (Zero);
+        } else {
+          Initializer[i++] = (Uncounted);
+        }
+      }
+    }
+  }
+
+  // Check if the number of edges counted at first was the number of edges we
+  // considered for instrumentation.
+  assert(i==NumEdges && "the number of edges in counting array is wrong");
+
+  // Assing the now completely defined initialiser to the array.
+  Constant *init = ConstantArray::get(ATy, Initializer);
+  Counters->setInitializer(init);
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 48071f1..1679bea 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -18,22 +18,27 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 
 void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
                                    GlobalValue *Array) {
+  LLVMContext &Context = MainFn->getContext();
   const Type *ArgVTy = 
-    PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty));
-  const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty);
+    PointerType::getUnqual(Type::getInt8PtrTy(Context));
+  const PointerType *UIntPtr =
+        Type::getInt32PtrTy(Context);
   Module &M = *MainFn->getParent();
-  Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty,
-                                           ArgVTy, UIntPtr, Type::Int32Ty,
+  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+                                           Type::getInt32Ty(Context),
+                                           ArgVTy, UIntPtr,
+                                           Type::getInt32Ty(Context),
                                            (Type *)0);
 
   // This could force argc and argv into programs that wouldn't otherwise have
   // them, but instead we just pass null values in.
   std::vector<Value*> Args(4);
-  Args[0] = Constant::getNullValue(Type::Int32Ty);
+  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
   Args[1] = Constant::getNullValue(ArgVTy);
 
   // Skip over any allocas in the entry block.
@@ -41,7 +46,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   BasicBlock::iterator InsertPos = Entry->begin();
   while (isa<AllocaInst>(InsertPos)) ++InsertPos;
 
-  std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty));
+  std::vector<Constant*> GEPIndices(2,
+                             Constant::getNullValue(Type::getInt32Ty(Context)));
   unsigned NumElements = 0;
   if (Array) {
     Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
@@ -53,7 +59,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     // pass null.
     Args[2] = ConstantPointerNull::get(UIntPtr);
   }
-  Args[3] = ConstantInt::get(Type::Int32Ty, NumElements);
+  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
   Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
                                            "newargc", InsertPos);
@@ -78,16 +84,18 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     AI = MainFn->arg_begin();
     // If the program looked at argc, have it look at the return value of the
     // init call instead.
-    if (AI->getType() != Type::Int32Ty) {
+    if (AI->getType() != Type::getInt32Ty(Context)) {
       Instruction::CastOps opcode;
       if (!AI->use_empty()) {
         opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
         AI->replaceAllUsesWith(
           CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
       }
-      opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true);
+      opcode = CastInst::getCastOpcode(AI, true,
+                                       Type::getInt32Ty(Context), true);
       InitCall->setOperand(1, 
-          CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall));
+          CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+                           "argc.cast", InitCall));
     } else {
       AI->replaceAllUsesWith(InitCall);
       InitCall->setOperand(1, AI);
@@ -104,17 +112,20 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
   while (isa<AllocaInst>(InsertPos))
     ++InsertPos;
 
+  LLVMContext &Context = BB->getContext();
+
   // Create the getelementptr constant expression
   std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::Int32Ty);
-  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
   Constant *ElementPtr = 
-    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
+                                          Indices.size());
 
   // Load, increment and store the value back.
   Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
   Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
-                                         ConstantInt::get(Type::Int32Ty, 1),
+                                 ConstantInt::get(Type::getInt32Ty(Context), 1),
                                          "NewFuncCounter", InsertPos);
   new StoreInst(NewVal, ElementPtr, InsertPos);
 }
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index b110f4e..3b72260 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -33,6 +33,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Pass.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
@@ -43,6 +44,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "RSProfiling.h"
 #include <set>
@@ -197,8 +200,8 @@ GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
                                          uint64_t resetval) : T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval); 
   ResetValue = Init;
-  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter", &M);
+  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter");
 }
 
 GlobalRandomCounter::~GlobalRandomCounter() {}
@@ -211,8 +214,9 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   //decrement counter
   LoadInst* l = new LoadInst(Counter, "counter", t);
   
-  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
-                             "countercc", t);
+  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
+                             ConstantInt::get(T, 0), 
+                             "countercc");
 
   Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
                                         "counternew", t);
@@ -221,7 +225,8 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   
   //reset counter
   BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), 
+  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
+                                              "reset", oldnext->getParent(), 
                                               oldnext);
   TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
   t->setSuccessor(0, resetblock);
@@ -234,8 +239,8 @@ GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
   : AI(0), T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval);
   ResetValue  = Init;
-  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter", &M);
+  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter");
 }
 
 GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
@@ -283,8 +288,9 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
   //decrement counter
   LoadInst* l = new LoadInst(AI, "counter", t);
   
-  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
-                             "countercc", t);
+  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
+                             ConstantInt::get(T, 0), 
+                             "countercc");
 
   Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
                                         "counternew", t);
@@ -293,7 +299,8 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
   
   //reset counter
   BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), 
+  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
+                                              "reset", oldnext->getParent(), 
                                               oldnext);
   TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
   t->setSuccessor(0, resetblock);
@@ -315,12 +322,13 @@ void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
   
   CallInst* c = CallInst::Create(F, "rdcc", t);
   BinaryOperator* b = 
-    BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm),
+    BinaryOperator::CreateAnd(c,
+                      ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm),
                               "mrdcc", t);
   
-  ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
-                             ConstantInt::get(Type::Int64Ty, 0), 
-                             "mrdccc", t);
+  ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b,
+                        ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), 
+                             "mrdccc");
 
   t->setCondition(s);
 }
@@ -345,16 +353,16 @@ void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNu
   
   // Create the getelementptr constant expression
   std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::Int32Ty);
-  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
-  Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray,
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext()));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum);
+  Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray,
                                                         &Indices[0], 2);
   
   // Load, increment and store the value back.
   Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
   profcode.insert(OldVal);
   Value *NewVal = BinaryOperator::CreateAdd(OldVal,
-                                            ConstantInt::get(Type::Int32Ty, 1),
+                       ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1),
                                             "NewCounter", InsertPos);
   profcode.insert(NewVal);
   profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
@@ -377,7 +385,8 @@ Value* ProfilerRS::Translate(Value* v) {
     if (bb == &bb->getParent()->getEntryBlock())
       TransCache[bb] = bb; //don't translate entry block
     else
-      TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(),
+      TransCache[bb] = BasicBlock::Create(v->getContext(), 
+                                          "dup_" + bb->getName(),
                                           bb->getParent(), NULL);
     return TransCache[bb];
   } else if (Instruction* i = dyn_cast<Instruction>(v)) {
@@ -401,7 +410,7 @@ Value* ProfilerRS::Translate(Value* v) {
     TransCache[v] = v;
     return v;
   }
-  assert(0 && "Value not handled");
+  llvm_unreachable("Value not handled");
   return 0;
 }
 
@@ -466,16 +475,16 @@ void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F)
   
   //a:
   Function::iterator BBN = src; ++BBN;
-  BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN);
+  BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN);
   //ChoicePoints.insert(bbC);
   BBN = cast<BasicBlock>(Translate(src));
-  BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN);
+  BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN);
   ChoicePoints.insert(bbCp);
   
   //b:
   BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);
   BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)), 
-                     ConstantInt::get(Type::Int1Ty, true), bbCp);
+              ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp);
   //c:
   {
     TerminatorInst* iB = src->getTerminator();
@@ -531,9 +540,8 @@ bool ProfilerRS::runOnFunction(Function& F) {
     TerminatorInst* T = F.getEntryBlock().getTerminator();
     ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),
                                               cast<BasicBlock>(
-                                                Translate(T->getSuccessor(0))),
-                                              ConstantInt::get(Type::Int1Ty,
-                                                               true)));
+                   Translate(T->getSuccessor(0))),
+                      ConstantInt::get(Type::getInt1Ty(F.getContext()), true)));
     
     //do whatever is needed now that the function is duplicated
     c->PrepFunction(&F);
@@ -556,10 +564,12 @@ bool ProfilerRS::runOnFunction(Function& F) {
 bool ProfilerRS::doInitialization(Module &M) {
   switch (RandomMethod) {
   case GBV:
-    c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
+    c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()),
+                                (1 << 14) - 1);
     break;
   case GBVO:
-    c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
+    c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()),
+                                   (1 << 14) - 1);
     break;
   case HOSTCC:
     c = new CycleCounter(M, (1 << 14) - 1);
@@ -639,7 +649,7 @@ static void getBackEdges(Function& F, T& BackEdges) {
   std::map<BasicBlock*, int> finish;
   int time = 0;
   recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
-  DOUT << F.getName() << " " << BackEdges.size() << "\n";
+  DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n");
 }
 
 
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 5fe1eeb..025d02a 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello
 include $(LEVEL)/Makefile.config
 
 # No support for plugins on windows targets
-ifeq ($(OS), $(filter $(OS), Cygwin MingW))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
   PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
 endif
 
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 9c55f66..37f383f 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -21,19 +21,17 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-
 using namespace llvm;
 
 STATISTIC(NumRemoved, "Number of instructions removed");
 
 namespace {
-  struct VISIBILITY_HIDDEN ADCE : public FunctionPass {
+  struct ADCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     ADCE() : FunctionPass(&ID) {}
     
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index fb9b880..54533f5 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Transforms/Scalar.h"
 #include <set>
@@ -40,7 +39,7 @@ using namespace llvm;
 STATISTIC(NumMoved, "Number of basic blocks moved");
 
 namespace {
-  struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass {
+  struct BlockPlacement : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     BlockPlacement() : FunctionPass(&ID) {}
 
@@ -127,13 +126,13 @@ void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
       /*empty*/;
     if (SI == E) return;  // No more successors to place.
 
-    unsigned MaxExecutionCount = PI->getExecutionCount(*SI);
+    double MaxExecutionCount = PI->getExecutionCount(*SI);
     BasicBlock *MaxSuccessor = *SI;
 
     // Scan for more frequently executed successors
     for (; SI != E; ++SI)
       if (!PlacedBlocks.count(*SI)) {
-        unsigned Count = PI->getExecutionCount(*SI);
+        double Count = PI->getExecutionCount(*SI);
         if (Count > MaxExecutionCount ||
             // Prefer to not disturb the code.
             (Count == MaxExecutionCount && *SI == &*InsertPos)) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 8a8f83f..cbeed4c 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,13 +1,13 @@
 add_llvm_library(LLVMScalarOpts
   ADCE.cpp
   BasicBlockPlacement.cpp
+  CodeGenLICM.cpp
   CodeGenPrepare.cpp
   CondPropagate.cpp
   ConstantProp.cpp
   DCE.cpp
   DeadStoreElimination.cpp
   GVN.cpp
-  GVNPRE.cpp
   IndVarSimplify.cpp
   InstructionCombining.cpp
   JumpThreading.cpp
@@ -19,7 +19,6 @@ add_llvm_library(LLVMScalarOpts
   LoopUnroll.cpp
   LoopUnswitch.cpp
   MemCpyOptimizer.cpp
-  PredicateSimplifier.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp
new file mode 100644
index 0000000..10f950e
--- /dev/null
+++ b/lib/Transforms/Scalar/CodeGenLICM.cpp
@@ -0,0 +1,112 @@
+//===- CodeGenLICM.cpp - LICM a function for code generation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function performs late LICM, hoisting constants out of loops that
+// are not valid immediates. It should not be followed by instcombine,
+// because instcombine would quickly stuff the constants back into the loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+using namespace llvm;
+
+namespace {
+  class CodeGenLICM : public LoopPass {
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit CodeGenLICM() : LoopPass(&ID) {}
+  };
+}
+
+char CodeGenLICM::ID = 0;
+static RegisterPass<CodeGenLICM> X("codegen-licm",
+                                   "hoist constants out of loops");
+
+Pass *llvm::createCodeGenLICMPass() {
+  return new CodeGenLICM();
+}
+
+bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) {
+  bool Changed = false;
+
+  // Only visit outermost loops.
+  if (L->getParentLoop()) return Changed;
+
+  Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator();
+  DenseMap<Constant *, BitCastInst *> HoistedConstants;
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
+         BBI != BBE; ++BBI) {
+      Instruction *I = BBI;
+      // TODO: For now, skip all intrinsic instructions, because some of them
+      // can require their operands to be constants, and we don't want to
+      // break that.
+      if (isa<IntrinsicInst>(I))
+        continue;
+      // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out.
+      if (BinaryOperator::isFNeg(I) ||
+          BinaryOperator::isNeg(I) ||
+          BinaryOperator::isNot(I))
+        continue;
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        // Don't hoist out switch case constants.
+        if (isa<SwitchInst>(I) && i == 1)
+          break;
+        // Don't hoist out shuffle masks.
+        if (isa<ShuffleVectorInst>(I) && i == 2)
+          break;
+        Value *Op = I->getOperand(i);
+        Constant *C = dyn_cast<Constant>(Op);
+        if (!C) continue;
+        // TODO: Ask the target which constants are legal. This would allow
+        // us to add support for hoisting ConstantInts and GlobalValues too.
+        if (isa<ConstantFP>(C) ||
+            isa<ConstantVector>(C) ||
+            isa<ConstantAggregateZero>(C)) {
+          BitCastInst *&BC = HoistedConstants[C];
+          if (!BC)
+            BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm);
+          I->setOperand(i, BC);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
+
+void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const {
+  // This pass preserves just about everything. List some popular things here.
+  AU.setPreservesCFG();
+  AU.addPreservedID(LoopSimplifyID);
+  AU.addPreserved<LoopInfo>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("lda");
+  AU.addPreserved("live-values");
+
+  // Hoisting requires a loop preheader.
+  AU.addRequiredID(LoopSimplifyID);
+}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 85e9243..a3e3fea 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -23,10 +23,9 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -35,10 +34,10 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -46,10 +45,11 @@ static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
                                        cl::init(false), cl::Hidden);
 
 namespace {
-  class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
+  class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// transformation profitability.
     const TargetLowering *TLI;
+    ProfileInfo *PI;
 
     /// BackEdges - Keep a set of all the loop back edges.
     ///
@@ -60,6 +60,10 @@ namespace {
       : FunctionPass(&ID), TLI(tli) {}
     bool runOnFunction(Function &F);
 
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<ProfileInfo>();
+    }
+
   private:
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -95,6 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
 bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
+  PI = getAnalysisIfAvailable<ProfileInfo>();
   // First pass, eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -232,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   BasicBlock *DestBB = BI->getSuccessor(0);
 
-  DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB;
+  DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
 
   // If the destination block has a single pred, then this is a trivial edge,
   // just collapse it.
@@ -241,12 +246,12 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB);
+      MergeBasicBlockIntoOnlyPred(DestBB, this);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
       
-      DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+      DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
       return;
     }
   }
@@ -283,9 +288,13 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
+  if (PI) {
+    PI->replaceAllUses(BB, DestBB);
+    PI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+  }
   BB->eraseFromParent();
 
-  DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
+  DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
 
@@ -358,6 +367,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 
       // If we found a workable predecessor, change TI to branch to Succ.
       if (FoundMatch) {
+        ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+        if (PI)
+          PI->splitEdge(TIBB, Dest, Pred);
         Dest->removePredecessor(TIBB);
         TI->setSuccessor(SuccNum, Pred);
         return;
@@ -410,8 +422,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 ///
 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   // If this is a noop copy,
-  MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
-  MVT DstVT = TLI.getValueType(CI->getType());
+  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(CI->getType());
 
   // This is an fp<->int conversion?
   if (SrcVT.isInteger() != DstVT.isInteger())
@@ -424,10 +436,10 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   // If these values will be promoted, find out what they will be promoted
   // to.  This helps us consider truncates on PPC as noop copies when they
   // are.
-  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
-    SrcVT = TLI.getTypeToTransformTo(SrcVT);
-  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
-    DstVT = TLI.getTypeToTransformTo(DstVT);
+  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
 
   // If, after promotion, these are the same types, this is a noop copy.
   if (SrcVT != DstVT)
@@ -520,7 +532,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
       BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
 
       InsertedCmp =
-        CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
+        CmpInst::Create(CI->getOpcode(),
+                        CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
       MadeChange = true;
     }
@@ -577,7 +590,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   // If all the instructions matched are already in this BB, don't do anything.
   if (!AnyNonLocal) {
-    DEBUG(cerr << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    DEBUG(errs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
     return false;
   }
 
@@ -592,14 +605,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // computation.
   Value *&SunkAddr = SunkAddrs[Addr];
   if (SunkAddr) {
-    DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
-               << *MemoryInst);
+    DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
     if (SunkAddr->getType() != Addr->getType())
       SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
   } else {
-    DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
-               << *MemoryInst);
-    const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
+    DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    const Type *IntPtrTy =
+          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
 
     Value *Result = 0;
     // Start with the scale value.
@@ -616,7 +630,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
         V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
       }
       if (AddrMode.Scale != 1)
-        V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy,
+        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
                                                                 AddrMode.Scale),
                                       "sunkaddr", InsertPt);
       Result = V;
@@ -648,7 +662,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
     // Add in the Base Offset if present.
     if (AddrMode.BaseOffs) {
-      Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs);
+      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
       if (Result)
         Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
       else
@@ -656,7 +670,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     }
 
     if (Result == 0)
-      SunkAddr = Context->getNullValue(Addr->getType());
+      SunkAddr = Constant::getNullValue(Addr->getType());
     else
       SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
   }
@@ -858,18 +872,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
     } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
       // If we found an inline asm expession, and if the target knows how to
       // lower it to normal LLVM code, do so now.
-      if (TLI && isa<InlineAsm>(CI->getCalledValue()))
-        if (const TargetAsmInfo *TAI =
-            TLI->getTargetMachine().getTargetAsmInfo()) {
-          if (TAI->ExpandInlineAsm(CI)) {
-            BBI = BB.begin();
-            // Avoid processing instructions out of order, which could cause
-            // reuse before a value is defined.
-            SunkAddrs.clear();
-          } else
-            // Sink address computing for memory operands into the block.
-            MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
-        }
+      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+        if (TLI->ExpandInlineAsm(CI)) {
+          BBI = BB.begin();
+          // Avoid processing instructions out of order, which could cause
+          // reuse before a value is defined.
+          SunkAddrs.clear();
+        } else
+          // Sink address computing for memory operands into the block.
+          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
+      }
     }
   }
 
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
index c85d031..5b573f4 100644
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ b/lib/Transforms/Scalar/CondPropagate.cpp
@@ -14,26 +14,21 @@
 
 #define DEBUG_TYPE "condprop"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Streams.h"
 using namespace llvm;
 
 STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
 STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
 
 namespace {
-  struct VISIBILITY_HIDDEN CondProp : public FunctionPass {
+  struct CondProp : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CondProp() : FunctionPass(&ID) {}
 
@@ -124,7 +119,7 @@ void CondProp::SimplifyBlock(BasicBlock *BB) {
       // Succ is now dead, but we cannot delete it without potentially
       // invalidating iterators elsewhere.  Just insert an unreachable
       // instruction in it and delete this block later on.
-      new UnreachableInst(Succ);
+      new UnreachableInst(BB->getContext(), Succ);
       DeadBlocks.push_back(Succ);
       MadeChange = true;
     }
@@ -196,8 +191,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {
   if (&*BBI != SI)
     return;
 
-  bool RemovedPreds = false;
-
   // Ok, we have this really simple case, walk the PHI operands, looking for
   // constants.  Walk from the end to remove operands from the end when
   // possible, and to avoid invalidating "i".
@@ -209,7 +202,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {
       RevectorBlockTo(PN->getIncomingBlock(i-1),
                       SI->getSuccessor(DestCase));
       ++NumSwThread;
-      RemovedPreds = true;
 
       // If there were two predecessors before this simplification, or if the
       // PHI node contained all the same value except for the one we just
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index b933488..4fee327 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Constant.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -33,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumInstKilled, "Number of instructions killed");
 
 namespace {
-  struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass {
+  struct ConstantPropagation : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     ConstantPropagation() : FunctionPass(&ID) {}
 
@@ -67,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
     WorkList.erase(WorkList.begin());    // Get an element from the worklist...
 
     if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I)) {
+      if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
         // Add all of the users of this instruction to the worklist, they might
         // be constant propagatable now...
         for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8bb504c..39940c3 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -34,7 +33,7 @@ namespace {
   //===--------------------------------------------------------------------===//
   // DeadInstElimination pass implementation
   //
-  struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass {
+  struct DeadInstElimination : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
     DeadInstElimination() : BasicBlockPass(&ID) {}
     virtual bool runOnBasicBlock(BasicBlock &BB) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b923c92..a7b3e75 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -29,14 +29,15 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
 STATISTIC(NumFastOther , "Number of other instrs removed");
 
 namespace {
-  struct VISIBILITY_HIDDEN DSE : public FunctionPass {
+  struct DSE : public FunctionPass {
+    TargetData *TD;
+
     static char ID; // Pass identification, replacement for typeid
     DSE() : FunctionPass(&ID) {}
 
@@ -62,7 +63,6 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTree>();
-      AU.addRequired<TargetData>();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addPreserved<DominatorTree>();
@@ -79,15 +79,15 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
   MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  TargetData &TD = getAnalysis<TargetData>();  
+  TD = getAnalysisIfAvailable<TargetData>();
 
   bool MadeChange = false;
   
-  // Do a top-down walk on the BB
+  // Do a top-down walk on the BB.
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
     Instruction *Inst = BBI++;
     
-    // If we find a store or a free, get it's memory dependence.
+    // If we find a store or a free, get its memory dependence.
     if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst))
       continue;
     
@@ -117,13 +117,17 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     // If this is a store-store dependence, then the previous store is dead so
     // long as this store is at least as big as it.
     if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
-      if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
-          TD.getTypeStoreSize(SI->getOperand(0)->getType())) {
+      if (TD &&
+          TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <=
+          TD->getTypeStoreSize(SI->getOperand(0)->getType())) {
         // Delete the store and now-dead instructions that feed it.
         DeleteDeadInstruction(DepStore);
         NumFastStores++;
         MadeChange = true;
-        
+
+        // DeleteDeadInstruction can delete the current instruction in loop
+        // cases, reset BBI.
+        BBI = Inst;
         if (BBI != BB.begin())
           --BBI;
         continue;
@@ -134,8 +138,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
       if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
           SI->getOperand(0) == DepLoad) {
+        // DeleteDeadInstruction can delete the current instruction.  Save BBI
+        // in case we need it.
+        WeakVH NextInst(BBI);
+        
         DeleteDeadInstruction(SI);
-        if (BBI != BB.begin())
+        
+        if (NextInst == 0)  // Next instruction deleted.
+          BBI = BB.begin();
+        else if (BBI != BB.begin())  // Revisit this instruction if possible.
           --BBI;
         NumFastStores++;
         MadeChange = true;
@@ -181,7 +192,6 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {
 /// store i32 1, i32* %A
 /// ret void
 bool DSE::handleEndBlock(BasicBlock &BB) {
-  TargetData &TD = getAnalysis<TargetData>();
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   
   bool MadeChange = false;
@@ -302,14 +312,16 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
         // Get size information for the alloca
         unsigned pointerSize = ~0U;
-        if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-          if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-            pointerSize = C->getZExtValue() *
-                          TD.getTypeAllocSize(A->getAllocatedType());
-        } else {
-          const PointerType* PT = cast<PointerType>(
-                                                 cast<Argument>(*I)->getType());
-          pointerSize = TD.getTypeAllocSize(PT->getElementType());
+        if (TD) {
+          if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+            if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+              pointerSize = C->getZExtValue() *
+                            TD->getTypeAllocSize(A->getAllocatedType());
+          } else {
+            const PointerType* PT = cast<PointerType>(
+                                                   cast<Argument>(*I)->getType());
+            pointerSize = TD->getTypeAllocSize(PT->getElementType());
+          }
         }
 
         // See if the call site touches it
@@ -357,7 +369,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
                                BasicBlock::iterator &BBI,
                                SmallPtrSet<Value*, 64>& deadPointers) {
-  TargetData &TD = getAnalysis<TargetData>();
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
                                   
   // If the kill pointer can be easily reduced to an alloca,
@@ -379,13 +390,15 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
       E = deadPointers.end(); I != E; ++I) {
     // Get size information for the alloca.
     unsigned pointerSize = ~0U;
-    if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-      if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-        pointerSize = C->getZExtValue() *
-                      TD.getTypeAllocSize(A->getAllocatedType());
-    } else {
-      const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
-      pointerSize = TD.getTypeAllocSize(PT->getElementType());
+    if (TD) {
+      if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+        if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+          pointerSize = C->getZExtValue() *
+                        TD->getTypeAllocSize(A->getAllocatedType());
+      } else {
+        const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
+        pointerSize = TD->getTypeAllocSize(PT->getElementType());
+      }
     }
 
     // See if this pointer could alias it
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index f4fe15e..2ed4a63 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -32,13 +33,18 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <cstdio>
 using namespace llvm;
 
@@ -60,17 +66,17 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 /// as an efficient mechanism to determine the expression-wise equivalence of
 /// two values.
 namespace {
-  struct VISIBILITY_HIDDEN Expression {
+  struct Expression {
     enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
                             UDIV, SDIV, FDIV, UREM, SREM,
-                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, 
-                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, 
-                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, 
-                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, 
-                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, 
+                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
+                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
+                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
+                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
+                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
                             FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
                             SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
-                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, 
+                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
                             PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
                             EMPTY, TOMBSTONE };
 
@@ -80,11 +86,11 @@ namespace {
     uint32_t secondVN;
     uint32_t thirdVN;
     SmallVector<uint32_t, 4> varargs;
-    Value* function;
-  
+    Value *function;
+
     Expression() { }
     Expression(ExpressionOpcode o) : opcode(o) { }
-  
+
     bool operator==(const Expression &other) const {
       if (opcode != other.opcode)
         return false;
@@ -103,30 +109,30 @@ namespace {
       else {
         if (varargs.size() != other.varargs.size())
           return false;
-      
+
         for (size_t i = 0; i < varargs.size(); ++i)
           if (varargs[i] != other.varargs[i])
             return false;
-    
+
         return true;
       }
     }
-  
+
     bool operator!=(const Expression &other) const {
       return !(*this == other);
     }
   };
-  
-  class VISIBILITY_HIDDEN ValueTable {
+
+  class ValueTable {
     private:
       DenseMap<Value*, uint32_t> valueNumbering;
       DenseMap<Expression, uint32_t> expressionNumbering;
       AliasAnalysis* AA;
       MemoryDependenceAnalysis* MD;
       DominatorTree* DT;
-  
+
       uint32_t nextValueNumber;
-    
+
       Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
       Expression::ExpressionOpcode getOpcode(CmpInst* C);
       Expression::ExpressionOpcode getOpcode(CastInst* C);
@@ -142,11 +148,11 @@ namespace {
       Expression create_expression(Constant* C);
     public:
       ValueTable() : nextValueNumber(1) { }
-      uint32_t lookup_or_add(Value* V);
-      uint32_t lookup(Value* V) const;
-      void add(Value* V, uint32_t num);
+      uint32_t lookup_or_add(Value *V);
+      uint32_t lookup(Value *V) const;
+      void add(Value *V, uint32_t num);
       void clear();
-      void erase(Value* v);
+      void erase(Value *v);
       unsigned size();
       void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
       AliasAnalysis *getAliasAnalysis() const { return AA; }
@@ -162,30 +168,30 @@ template <> struct DenseMapInfo<Expression> {
   static inline Expression getEmptyKey() {
     return Expression(Expression::EMPTY);
   }
-  
+
   static inline Expression getTombstoneKey() {
     return Expression(Expression::TOMBSTONE);
   }
-  
+
   static unsigned getHashValue(const Expression e) {
     unsigned hash = e.opcode;
-    
+
     hash = e.firstVN + hash * 37;
     hash = e.secondVN + hash * 37;
     hash = e.thirdVN + hash * 37;
-    
+
     hash = ((unsigned)((uintptr_t)e.type >> 4) ^
             (unsigned)((uintptr_t)e.type >> 9)) +
            hash * 37;
-    
+
     for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
          E = e.varargs.end(); I != E; ++I)
       hash = *I + hash * 37;
-    
+
     hash = ((unsigned)((uintptr_t)e.function >> 4) ^
             (unsigned)((uintptr_t)e.function >> 9)) +
            hash * 37;
-    
+
     return hash;
   }
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
@@ -201,7 +207,7 @@ template <> struct DenseMapInfo<Expression> {
 Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
   switch(BO->getOpcode()) {
   default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Binary operator with unknown opcode?");
+    llvm_unreachable("Binary operator with unknown opcode?");
   case Instruction::Add:  return Expression::ADD;
   case Instruction::FAdd: return Expression::FADD;
   case Instruction::Sub:  return Expression::SUB;
@@ -224,10 +230,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
 }
 
 Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
-  if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) {
+  if (isa<ICmpInst>(C)) {
     switch (C->getPredicate()) {
     default:  // THIS SHOULD NEVER HAPPEN
-      assert(0 && "Comparison with unknown predicate?");
+      llvm_unreachable("Comparison with unknown predicate?");
     case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
     case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
     case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
@@ -239,32 +245,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
     case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
     case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
     }
-  }
-  assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare");
-  switch (C->getPredicate()) {
-  default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Comparison with unknown predicate?");
-  case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
-  case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
-  case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
-  case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
-  case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
-  case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
-  case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
-  case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
-  case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
-  case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
-  case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
-  case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
-  case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
-  case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+  } else {
+    switch (C->getPredicate()) {
+    default: // THIS SHOULD NEVER HAPPEN
+      llvm_unreachable("Comparison with unknown predicate?");
+    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
+    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
+    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
+    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
+    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
+    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
+    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
+    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
+    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
+    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
+    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
+    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
+    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
+    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
+    }
   }
 }
 
 Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
   switch(C->getOpcode()) {
   default: // THIS SHOULD NEVER HAPPEN
-    assert(0 && "Cast operator with unknown opcode?");
+    llvm_unreachable("Cast operator with unknown opcode?");
   case Instruction::Trunc:    return Expression::TRUNC;
   case Instruction::ZExt:     return Expression::ZEXT;
   case Instruction::SExt:     return Expression::SEXT;
@@ -282,126 +288,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
 
 Expression ValueTable::create_expression(CallInst* C) {
   Expression e;
-  
+
   e.type = C->getType();
   e.firstVN = 0;
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = C->getCalledFunction();
   e.opcode = Expression::CALL;
-  
+
   for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
        I != E; ++I)
     e.varargs.push_back(lookup_or_add(*I));
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(BinaryOperator* BO) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(BO->getOperand(0));
   e.secondVN = lookup_or_add(BO->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = BO->getType();
   e.opcode = getOpcode(BO);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(CmpInst* C) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(C->getOperand(0));
   e.secondVN = lookup_or_add(C->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(CastInst* C) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(C->getOperand(0));
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(ShuffleVectorInst* S) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(S->getOperand(0));
   e.secondVN = lookup_or_add(S->getOperand(1));
   e.thirdVN = lookup_or_add(S->getOperand(2));
   e.function = 0;
   e.type = S->getType();
   e.opcode = Expression::SHUFFLE;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(ExtractElementInst* E) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(E->getOperand(0));
   e.secondVN = lookup_or_add(E->getOperand(1));
   e.thirdVN = 0;
   e.function = 0;
   e.type = E->getType();
   e.opcode = Expression::EXTRACT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(InsertElementInst* I) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(I->getOperand(0));
   e.secondVN = lookup_or_add(I->getOperand(1));
   e.thirdVN = lookup_or_add(I->getOperand(2));
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::INSERT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(SelectInst* I) {
   Expression e;
-    
+
   e.firstVN = lookup_or_add(I->getCondition());
   e.secondVN = lookup_or_add(I->getTrueValue());
   e.thirdVN = lookup_or_add(I->getFalseValue());
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::SELECT;
-  
+
   return e;
 }
 
 Expression ValueTable::create_expression(GetElementPtrInst* G) {
   Expression e;
-  
+
   e.firstVN = lookup_or_add(G->getPointerOperand());
   e.secondVN = 0;
   e.thirdVN = 0;
   e.function = 0;
   e.type = G->getType();
   e.opcode = Expression::GEP;
-  
+
   for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
        I != E; ++I)
     e.varargs.push_back(lookup_or_add(*I));
-  
+
   return e;
 }
 
@@ -410,21 +416,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) {
 //===----------------------------------------------------------------------===//
 
 /// add - Insert a value into the table with a specified value number.
-void ValueTable::add(Value* V, uint32_t num) {
+void ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
 }
 
 /// lookup_or_add - Returns the value number for the specified value, assigning
 /// it a new number if it did not have one before.
-uint32_t ValueTable::lookup_or_add(Value* V) {
+uint32_t ValueTable::lookup_or_add(Value *V) {
   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
   if (VI != valueNumbering.end())
     return VI->second;
-  
+
   if (CallInst* C = dyn_cast<CallInst>(V)) {
     if (AA->doesNotAccessMemory(C)) {
       Expression e = create_expression(C);
-    
+
       DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
       if (EI != expressionNumbering.end()) {
         valueNumbering.insert(std::make_pair(V, EI->second));
@@ -432,20 +438,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
       } else {
         expressionNumbering.insert(std::make_pair(e, nextValueNumber));
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
         return nextValueNumber++;
       }
     } else if (AA->onlyReadsMemory(C)) {
       Expression e = create_expression(C);
-      
+
       if (expressionNumbering.find(e) == expressionNumbering.end()) {
         expressionNumbering.insert(std::make_pair(e, nextValueNumber));
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
       }
-      
+
       MemDepResult local_dep = MD->getDependency(C);
-      
+
       if (!local_dep.isDef() && !local_dep.isNonLocal()) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
@@ -453,12 +459,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
 
       if (local_dep.isDef()) {
         CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
-        
+
         if (local_cdep->getNumOperands() != C->getNumOperands()) {
           valueNumbering.insert(std::make_pair(V, nextValueNumber));
           return nextValueNumber++;
         }
-          
+
         for (unsigned i = 1; i < C->getNumOperands(); ++i) {
           uint32_t c_vn = lookup_or_add(C->getOperand(i));
           uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
@@ -467,19 +473,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
             return nextValueNumber++;
           }
         }
-      
+
         uint32_t v = lookup_or_add(local_cdep);
         valueNumbering.insert(std::make_pair(V, v));
         return v;
       }
 
       // Non-local case.
-      const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 
+      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
         MD->getNonLocalCallDependency(CallSite(C));
       // FIXME: call/call dependencies for readonly calls should return def, not
       // clobber!  Move the checking logic to MemDep!
       CallInst* cdep = 0;
-      
+
       // Check to see if we have a single dominating call instruction that is
       // identical to C.
       for (unsigned i = 0, e = deps.size(); i != e; ++i) {
@@ -494,23 +500,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
           cdep = 0;
           break;
         }
-        
+
         CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
         // FIXME: All duplicated with non-local case.
         if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
           cdep = NonLocalDepCall;
           continue;
         }
-        
+
         cdep = 0;
         break;
       }
-      
+
       if (!cdep) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
       }
-      
+
       if (cdep->getNumOperands() != C->getNumOperands()) {
         valueNumbering.insert(std::make_pair(V, nextValueNumber));
         return nextValueNumber++;
@@ -523,18 +529,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
           return nextValueNumber++;
         }
       }
-      
+
       uint32_t v = lookup_or_add(cdep);
       valueNumbering.insert(std::make_pair(V, v));
       return v;
-      
+
     } else {
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
       return nextValueNumber++;
     }
   } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
     Expression e = create_expression(BO);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -542,12 +548,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
     Expression e = create_expression(C);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -555,12 +561,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -568,12 +574,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -581,12 +587,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -594,12 +600,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -607,12 +613,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (CastInst* U = dyn_cast<CastInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -620,12 +626,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
     Expression e = create_expression(U);
-    
+
     DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
     if (EI != expressionNumbering.end()) {
       valueNumbering.insert(std::make_pair(V, EI->second));
@@ -633,7 +639,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
     } else {
       expressionNumbering.insert(std::make_pair(e, nextValueNumber));
       valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      
+
       return nextValueNumber++;
     }
   } else {
@@ -644,7 +650,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {
 
 /// lookup - Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t ValueTable::lookup(Value* V) const {
+uint32_t ValueTable::lookup(Value *V) const {
   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
   assert(VI != valueNumbering.end() && "Value not numbered?");
   return VI->second;
@@ -658,7 +664,7 @@ void ValueTable::clear() {
 }
 
 /// erase - Remove a value from the value numbering
-void ValueTable::erase(Value* V) {
+void ValueTable::erase(Value *V) {
   valueNumbering.erase(V);
 }
 
@@ -676,17 +682,17 @@ void ValueTable::verifyRemoved(const Value *V) const {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  struct VISIBILITY_HIDDEN ValueNumberScope {
+  struct ValueNumberScope {
     ValueNumberScope* parent;
     DenseMap<uint32_t, Value*> table;
-    
+
     ValueNumberScope(ValueNumberScope* p) : parent(p) { }
   };
 }
 
 namespace {
 
-  class VISIBILITY_HIDDEN GVN : public FunctionPass {
+  class GVN : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -698,45 +704,35 @@ namespace {
 
     ValueTable VN;
     DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
-    
-    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
-    PhiMapType phiMap;
-    
-    
+
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
-      
+
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<AliasAnalysis>();
     }
-  
+
     // Helper fuctions
     // FIXME: eliminate or document these better
     bool processLoad(LoadInst* L,
                      SmallVectorImpl<Instruction*> &toErase);
-    bool processInstruction(Instruction* I,
+    bool processInstruction(Instruction *I,
                             SmallVectorImpl<Instruction*> &toErase);
     bool processNonLocalLoad(LoadInst* L,
                              SmallVectorImpl<Instruction*> &toErase);
-    bool processBlock(BasicBlock* BB);
-    Value *GetValueForBlock(BasicBlock *BB, Instruction* orig,
-                            DenseMap<BasicBlock*, Value*> &Phis,
-                            bool top_level = false);
+    bool processBlock(BasicBlock *BB);
     void dump(DenseMap<uint32_t, Value*>& d);
     bool iterateOnFunction(Function &F);
-    Value* CollapsePhi(PHINode* p);
-    bool isSafeReplacement(PHINode* p, Instruction* inst);
+    Value *CollapsePhi(PHINode* p);
     bool performPRE(Function& F);
-    Value* lookupNumber(BasicBlock* BB, uint32_t num);
-    bool mergeBlockIntoPredecessor(BasicBlock* BB);
-    Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno);
+    Value *lookupNumber(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
   };
-  
+
   char GVN::ID = 0;
 }
 
@@ -756,107 +752,31 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   printf("}\n");
 }
 
-Value* GVN::CollapsePhi(PHINode* p) {
-  Value* constVal = p->hasConstantValue();
-  if (!constVal) return 0;
-  
-  Instruction* inst = dyn_cast<Instruction>(constVal);
-  if (!inst)
-    return constVal;
-    
-  if (DT->dominates(inst, p))
-    if (isSafeReplacement(p, inst))
-      return inst;
-  return 0;
-}
-
-bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
+static bool isSafeReplacement(PHINode* p, Instruction *inst) {
   if (!isa<PHINode>(inst))
     return true;
-  
+
   for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
        UI != E; ++UI)
     if (PHINode* use_phi = dyn_cast<PHINode>(UI))
       if (use_phi->getParent() == inst->getParent())
         return false;
-  
+
   return true;
 }
 
-/// GetValueForBlock - Get the value to use within the specified basic block.
-/// available values are in Phis.
-Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
-                             DenseMap<BasicBlock*, Value*> &Phis,
-                             bool top_level) { 
-                                 
-  // If we have already computed this value, return the previously computed val.
-  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
-  if (V != Phis.end() && !top_level) return V->second;
-  
-  // If the block is unreachable, just return undef, since this path
-  // can't actually occur at runtime.
-  if (!DT->isReachableFromEntry(BB))
-    return Phis[BB] = Context->getUndef(orig->getType());
-  
-  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
-    Value *ret = GetValueForBlock(Pred, orig, Phis);
-    Phis[BB] = ret;
-    return ret;
-  }
+Value *GVN::CollapsePhi(PHINode *PN) {
+  Value *ConstVal = PN->hasConstantValue(DT);
+  if (!ConstVal) return 0;
 
-  // Get the number of predecessors of this block so we can reserve space later.
-  // If there is already a PHI in it, use the #preds from it, otherwise count.
-  // Getting it from the PHI is constant time.
-  unsigned NumPreds;
-  if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin()))
-    NumPreds = ExistingPN->getNumIncomingValues();
-  else
-    NumPreds = std::distance(pred_begin(BB), pred_end(BB));
-  
-  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
-  // now, then get values to fill in the incoming values for the PHI.
-  PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle",
-                                BB->begin());
-  PN->reserveOperandSpace(NumPreds);
-  
-  Phis.insert(std::make_pair(BB, PN));
-  
-  // Fill in the incoming values for the block.
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-    Value* val = GetValueForBlock(*PI, orig, Phis);
-    PN->addIncoming(val, *PI);
-  }
-  
-  VN.getAliasAnalysis()->copyValue(orig, PN);
-  
-  // Attempt to collapse PHI nodes that are trivially redundant
-  Value* v = CollapsePhi(PN);
-  if (!v) {
-    // Cache our phi construction results
-    if (LoadInst* L = dyn_cast<LoadInst>(orig))
-      phiMap[L->getPointerOperand()].insert(PN);
-    else
-      phiMap[orig].insert(PN);
-    
-    return PN;
-  }
-    
-  PN->replaceAllUsesWith(v);
-  if (isa<PointerType>(v->getType()))
-    MD->invalidateCachedPointerInfo(v);
-
-  for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
-       E = Phis.end(); I != E; ++I)
-    if (I->second == PN)
-      I->second = v;
-
-  DEBUG(cerr << "GVN removed: " << *PN);
-  MD->removeInstruction(PN);
-  PN->eraseFromParent();
-  DEBUG(verifyRemoved(PN));
-
-  Phis[BB] = v;
-  return v;
+  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
+  if (!Inst)
+    return ConstVal;
+
+  if (DT->dominates(Inst, PN))
+    if (isSafeReplacement(PN, Inst))
+      return Inst;
+  return 0;
 }
 
 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
@@ -869,11 +789,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,
 ///      currently speculating that it will be.
 ///   3) we are speculating for this block and have used that to speculate for
 ///      other blocks.
-static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
                             DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
   // Optimistically assume that the block is fully available and check to see
   // if we already know about this block in one lookup.
-  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 
+  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
 
   // If the entry already existed for this block, return the precomputed value.
@@ -884,29 +804,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
       IV.first->second = 3;
     return IV.first->second != 0;
   }
-  
+
   // Otherwise, see if it is fully available in all predecessors.
   pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-  
+
   // If this block has no predecessors, it isn't live-in here.
   if (PI == PE)
     goto SpeculationFailure;
-  
+
   for (; PI != PE; ++PI)
     // If the value isn't fully available in one of our predecessors, then it
     // isn't fully available in this block either.  Undo our previous
     // optimistic assumption and bail out.
     if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
       goto SpeculationFailure;
-  
+
   return true;
-  
+
 // SpeculationFailure - If we get here, we found out that this is not, after
 // all, a fully-available block.  We have a problem if we speculated on this and
 // used the speculation to mark other blocks as available.
 SpeculationFailure:
   char &BBVal = FullyAvailableBlocks[BB];
-  
+
   // If we didn't speculate on this, just return with it set to false.
   if (BBVal == 2) {
     BBVal = 0;
@@ -918,7 +838,7 @@ SpeculationFailure:
   // 0 if set to one.
   SmallVector<BasicBlock*, 32> BBWorklist;
   BBWorklist.push_back(BB);
-  
+
   while (!BBWorklist.empty()) {
     BasicBlock *Entry = BBWorklist.pop_back_val();
     // Note that this sets blocks to 0 (unavailable) if they happen to not
@@ -928,24 +848,372 @@ SpeculationFailure:
 
     // Mark as unavailable.
     EntryVal = 0;
-    
+
     for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
       BBWorklist.push_back(*I);
   }
-  
+
   return false;
 }
 
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+                                            const Type *LoadTy,
+                                            const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
+      isa<StructType>(StoredVal->getType()) ||
+      isa<ArrayType>(StoredVal->getType()))
+    return false;
+  
+  // The store has to be at least as big as the load.
+  if (TD.getTypeSizeInBits(StoredVal->getType()) <
+        TD.getTypeSizeInBits(LoadTy))
+    return false;
+  
+  return true;
+}
+  
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value.  LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 
+                                             const Type *LoadedTy,
+                                             Instruction *InsertPt,
+                                             const TargetData &TD) {
+  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+    return 0;
+  
+  const Type *StoredValTy = StoredVal->getType();
+  
+  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  
+  // If the store and reload are the same size, we can always reuse it.
+  if (StoreSize == LoadSize) {
+    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
+      // Pointer to Pointer -> use bitcast.
+      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+    }
+    
+    // Convert source pointers to integers, which can be bitcast.
+    if (isa<PointerType>(StoredValTy)) {
+      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+    }
+    
+    const Type *TypeToCastTo = LoadedTy;
+    if (isa<PointerType>(TypeToCastTo))
+      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+    
+    if (StoredValTy != TypeToCastTo)
+      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+    
+    // Cast to pointer if the load needs a pointer type.
+    if (isa<PointerType>(LoadedTy))
+      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+    
+    return StoredVal;
+  }
+  
+  // If the loaded value is smaller than the available value, then we can
+  // extract out a piece from it.  If the available value is too small, then we
+  // can't do anything.
+  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+  
+  // Convert source pointers to integers, which can be manipulated.
+  if (isa<PointerType>(StoredValTy)) {
+    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // Convert vectors and fp to integer, which can be manipulated.
+  if (!isa<IntegerType>(StoredValTy)) {
+    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // If this is a big-endian system, we need to shift the value down to the low
+  // bits so that a truncate will work.
+  if (TD.isBigEndian()) {
+    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+  }
+  
+  // Truncate the integer to the right size now.
+  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+  
+  if (LoadedTy == NewIntTy)
+    return StoredVal;
+  
+  // If the result is a pointer, inttoptr.
+  if (isa<PointerType>(LoadedTy))
+    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+  
+  // Otherwise, bitcast.
+  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
+/// be expressed as a base pointer plus a constant offset.  Return the base and
+/// offset to the caller.
+static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                        const TargetData &TD) {
+  Operator *PtrOp = dyn_cast<Operator>(Ptr);
+  if (PtrOp == 0) return Ptr;
+  
+  // Just look through bitcasts.
+  if (PtrOp->getOpcode() == Instruction::BitCast)
+    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+  
+  // If this is a GEP with constant indices, we can look through it.
+  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+  
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = cast<ConstantInt>(*I);
+    if (OpC->isZero()) continue;
+    
+    // Handle a struct and array indices which add their offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += OpC->getSExtValue()*Size;
+    }
+  }
+  
+  // Re-sign extend from the pointer size if needed to get overflow edge cases
+  // right.
+  unsigned PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize < 64)
+    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+  
+  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.  This returns -1 if we have to
+/// give up, or a byte number in the stored value of the piece that feeds the
+/// load.
+static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
+      isa<StructType>(DepSI->getOperand(0)->getType()) ||
+      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+    return -1;
+  
+  int64_t StoreOffset = 0, LoadOffset = 0;
+  Value *StoreBase = 
+    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+  Value *LoadBase = 
+    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+  if (StoreBase != LoadBase)
+    return -1;
+  
+  // If the load and store are to the exact same address, they should have been
+  // a must alias.  AA must have gotten confused.
+  // FIXME: Study to see if/when this happens.
+  if (LoadOffset == StoreOffset) {
+#if 0
+    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
+    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
+    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
+    errs() << "'" << L->getParent()->getParent()->getName() << "'"
+    << *L->getParent();
+#endif
+    return -1;
+  }
+  
+  // If the load and store don't overlap at all, the store doesn't provide
+  // anything to the load.  In this case, they really don't alias at all, AA
+  // must have gotten confused.
+  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
+  // remove this check, as it is duplicated with what we have below.
+  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+  
+  if ((StoreSize & 7) | (LoadSize & 7))
+    return -1;
+  StoreSize >>= 3;  // Convert to bytes.
+  LoadSize >>= 3;
+  
+  
+  bool isAAFailure = false;
+  if (StoreOffset < LoadOffset) {
+    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+  } else {
+    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+  }
+  if (isAAFailure) {
+#if 0
+    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
+    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
+    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
+    errs() << "'" << L->getParent()->getParent()->getName() << "'"
+    << *L->getParent();
+#endif
+    return -1;
+  }
+  
+  // If the Load isn't completely contained within the stored bits, we don't
+  // have all the bits to feed it.  We could do something crazy in the future
+  // (issue a smaller load then merge the bits in) but this seems unlikely to be
+  // valuable.
+  if (StoreOffset > LoadOffset ||
+      StoreOffset+StoreSize < LoadOffset+LoadSize)
+    return -1;
+  
+  // Okay, we can do this transformation.  Return the number of bytes into the
+  // store that the load is.
+  return LoadOffset-StoreOffset;
+}  
+
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+                                   const Type *LoadTy,
+                                   Instruction *InsertPt, const TargetData &TD){
+  LLVMContext &Ctx = SrcVal->getType()->getContext();
+  
+  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+  
+  
+  // Compute which bits of the stored value are being used by the load.  Convert
+  // to an integer type to start with.
+  if (isa<PointerType>(SrcVal->getType()))
+    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+  if (!isa<IntegerType>(SrcVal->getType()))
+    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                             "tmp", InsertPt);
+  
+  // Shift the bits to the least significant depending on endianness.
+  unsigned ShiftAmt;
+  if (TD.isLittleEndian()) {
+    ShiftAmt = Offset*8;
+  } else {
+    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+  }
+  
+  if (ShiftAmt)
+    SrcVal = BinaryOperator::CreateLShr(SrcVal,
+                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+  
+  if (LoadSize != StoreSize)
+    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                           "tmp", InsertPt);
+  
+  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+struct AvailableValueInBlock {
+  /// BB - The basic block in question.
+  BasicBlock *BB;
+  /// V - The value that is live out of the block.
+  Value *V;
+  /// Offset - The byte offset in V that is interesting for the load query.
+  unsigned Offset;
+  
+  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                   unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.V = V;
+    Res.Offset = Offset;
+    return Res;
+  }
+};
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI.  This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI, 
+                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+                                     const TargetData *TD,
+                                     AliasAnalysis *AA) {
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI);
+  
+  const Type *LoadTy = LI->getType();
+  
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    BasicBlock *BB = ValuesPerBlock[i].BB;
+    Value *AvailableVal = ValuesPerBlock[i].V;
+    unsigned Offset = ValuesPerBlock[i].Offset;
+    
+    if (SSAUpdate.HasValueForBlock(BB))
+      continue;
+    
+    if (AvailableVal->getType() != LoadTy) {
+      assert(TD && "Need target data to handle type mismatch case");
+      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+                                          BB->getTerminator(), *TD);
+      
+      if (Offset) {
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
+              << *ValuesPerBlock[i].V << '\n'
+              << *AvailableVal << '\n' << "\n\n\n");
+      }
+      
+      
+      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
+            << *ValuesPerBlock[i].V << '\n'
+            << *AvailableVal << '\n' << "\n\n\n");
+    }
+    
+    SSAUpdate.AddAvailableValue(BB, AvailableVal);
+  }
+  
+  // Perform PHI construction.
+  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+  
+  // If new PHI nodes were created, notify alias analysis.
+  if (isa<PointerType>(V->getType()))
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      AA->copyValue(LI, NewPHIs[i]);
+
+  return V;
+}
+
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
-  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; 
+  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
   MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                    Deps);
-  //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI);
-  
+  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
+  //             << Deps.size() << *LI << '\n');
+
   // If we had to process more than one hundred blocks to find the
   // dependencies, this load isn't worth worrying about.  Optimizing
   // it will be too expensive.
@@ -956,106 +1224,124 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // clobber in the current block.  Reject this early.
   if (Deps.size() == 1 && Deps[0].second.isClobber()) {
     DEBUG(
-      DOUT << "GVN: non-local load ";
-      WriteAsOperand(*DOUT.stream(), LI);
-      DOUT << " is clobbered by " << *Deps[0].second.getInst();
+      errs() << "GVN: non-local load ";
+      WriteAsOperand(errs(), LI);
+      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
     );
     return false;
   }
-  
+
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
-  SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
+  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
   SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+  const TargetData *TD = 0;
   
   for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
     BasicBlock *DepBB = Deps[i].first;
     MemDepResult DepInfo = Deps[i].second;
-    
+
     if (DepInfo.isClobber()) {
+      // If the dependence is to a store that writes to a superset of the bits
+      // read by the load, we can extract the bits we need for the load from the
+      // stored value.
+      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        if (TD) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                           DepSI->getOperand(0),
+                                                                Offset));
+            continue;
+          }
+        }
+      }
+      
+      // FIXME: Handle memset/memcpy.
       UnavailableBlocks.push_back(DepBB);
       continue;
     }
-    
+
     Instruction *DepInst = DepInfo.getInst();
-    
+
     // Loading the allocation -> undef.
-    if (isa<AllocationInst>(DepInst)) {
-      ValuesPerBlock.push_back(std::make_pair(DepBB, 
-                                            Context->getUndef(LI->getType())));
+    if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                             UndefValue::get(LI->getType())));
       continue;
     }
-  
-    if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
-      // Reject loads and stores that are to the same address but are of 
-      // different types.
-      // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
-      // of bitfield access, it would be interesting to optimize for it at some
-      // point.
+
+    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+      // Reject loads and stores that are to the same address but are of
+      // different types if we have to.
       if (S->getOperand(0)->getType() != LI->getType()) {
-        UnavailableBlocks.push_back(DepBB);
-        continue;
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+                                                        LI->getType(), *TD)) {
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }
       }
-      
-      ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
-      
-    } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) {
+
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                          S->getOperand(0)));
+      continue;
+    }
+    
+    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+      // If the types mismatch and we can't handle it, reject reuse of the load.
       if (LD->getType() != LI->getType()) {
-        UnavailableBlocks.push_back(DepBB);
-        continue;
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }          
       }
-      ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
-    } else {
-      UnavailableBlocks.push_back(DepBB);
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
       continue;
     }
+    
+    UnavailableBlocks.push_back(DepBB);
+    continue;
   }
-  
+
   // If we have no predecessors that produce a known value for this load, exit
   // early.
   if (ValuesPerBlock.empty()) return false;
-  
+
   // If all of the instructions we depend on produce a known value for this
   // load, then it is fully redundant and we can use PHI insertion to compute
   // its value.  Insert PHIs and remove the fully redundant value now.
   if (UnavailableBlocks.empty()) {
-    // Use cached PHI construction information from previous runs
-    SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
-    // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
-    for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
-         I != E; ++I) {
-      if ((*I)->getParent() == LI->getParent()) {
-        DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI);
-        LI->replaceAllUsesWith(*I);
-        if (isa<PointerType>((*I)->getType()))
-          MD->invalidateCachedPointerInfo(*I);
-        toErase.push_back(LI);
-        NumGVNLoad++;
-        return true;
-      }
-      
-      ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
-    }
-    
-    DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI);
+    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
     
-    DenseMap<BasicBlock*, Value*> BlockReplValues;
-    BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
     // Perform PHI construction.
-    Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
-    LI->replaceAllUsesWith(v);
-    
-    if (isa<PHINode>(v))
-      v->takeName(LI);
-    if (isa<PointerType>(v->getType()))
-      MD->invalidateCachedPointerInfo(v);
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+                                      VN.getAliasAnalysis());
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (isa<PointerType>(V->getType()))
+      MD->invalidateCachedPointerInfo(V);
     toErase.push_back(LI);
     NumGVNLoad++;
     return true;
   }
-  
+
   if (!EnablePRE || !EnableLoadPRE)
     return false;
 
@@ -1066,7 +1352,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // prefer to not increase code size.  As such, we only do this when we know
   // that we only have to insert *one* load (which means we're basically moving
   // the load, not inserting a new one).
-  
+
   SmallPtrSet<BasicBlock *, 4> Blockers;
   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
     Blockers.insert(UnavailableBlocks[i]);
@@ -1090,28 +1376,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
       allSingleSucc = false;
   }
-  
+
   assert(TmpBB);
   LoadBB = TmpBB;
-  
+
   // If we have a repl set with LI itself in it, this means we have a loop where
   // at least one of the values is LI.  Since this means that we won't be able
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].second == LI)
+    if (ValuesPerBlock[i].V == LI)
       return false;
-  
+
   if (isSinglePred) {
     bool isHot = false;
     for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second))
-	// "Hot" Instruction is in some loop (because it dominates its dep. 
-	// instruction).
-	if (DT->dominates(LI, I)) { 
-	  isHot = true;
-	  break;
-	}
+      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+        // "Hot" Instruction is in some loop (because it dominates its dep.
+        // instruction).
+        if (DT->dominates(LI, I)) {
+          isHot = true;
+          break;
+        }
 
     // We are interested only in "hot" instructions. We don't want to do any
     // mis-optimizations here.
@@ -1128,7 +1414,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
+    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
     FullyAvailableBlocks[UnavailableBlocks[i]] = false;
 
@@ -1136,33 +1422,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
        PI != E; ++PI) {
     if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
       continue;
-    
+
     // If this load is not available in multiple predecessors, reject it.
     if (UnavailablePred && UnavailablePred != *PI)
       return false;
     UnavailablePred = *PI;
   }
-  
+
   assert(UnavailablePred != 0 &&
          "Fully available value should be eliminated above!");
-  
+
   // If the loaded pointer is PHI node defined in this block, do PHI translation
   // to get its value in the predecessor.
   Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
-  
+
   // Make sure the value is live in the predecessor.  If it was defined by a
   // non-PHI instruction in this block, we don't know how to recompute it above.
   if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
     if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
-      DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
-                 << *LPInst << *LI << "\n");
+      DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
+                   << *LPInst << '\n' << *LI << "\n");
       return false;
     }
-  
+
   // We don't currently handle critical edges :(
   if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
-    DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
-                << UnavailablePred->getName() << "': " << *LI);
+    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+                 << UnavailablePred->getName() << "': " << *LI << '\n');
     return false;
   }
 
@@ -1182,28 +1468,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
-  DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI);
-  
+  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+
   Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
                                 LI->getAlignment(),
                                 UnavailablePred->getTerminator());
-  
-  SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
-  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
-       I != E; ++I)
-    ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
-  
-  DenseMap<BasicBlock*, Value*> BlockReplValues;
-  BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
-  BlockReplValues[UnavailablePred] = NewLoad;
-  
+
+  // Add the newly created load.
+  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
+
   // Perform PHI construction.
-  Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
-  LI->replaceAllUsesWith(v);
-  if (isa<PHINode>(v))
-    v->takeName(LI);
-  if (isa<PointerType>(v->getType()))
-    MD->invalidateCachedPointerInfo(v);
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
+                                    VN.getAliasAnalysis());
+  LI->replaceAllUsesWith(V);
+  if (isa<PHINode>(V))
+    V->takeName(LI);
+  if (isa<PointerType>(V->getType()))
+    MD->invalidateCachedPointerInfo(V);
   toErase.push_back(LI);
   NumPRELoad++;
   return true;
@@ -1214,64 +1495,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   if (L->isVolatile())
     return false;
-  
-  Value* pointer = L->getPointerOperand();
 
   // ... to a pointer that has been loaded from before...
-  MemDepResult dep = MD->getDependency(L);
-  
+  MemDepResult Dep = MD->getDependency(L);
+
   // If the value isn't available, don't do anything!
-  if (dep.isClobber()) {
+  if (Dep.isClobber()) {
+    // FIXME: We should handle memset/memcpy/memmove as dependent instructions
+    // to forward the value if available.
+    //if (isa<MemIntrinsic>(Dep.getInst()))
+    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
+    
+    // Check to see if we have something like this:
+    //   store i32 123, i32* %P
+    //   %A = bitcast i32* %P to i8*
+    //   %B = gep i8* %A, i32 1
+    //   %C = load i8* %B
+    //
+    // We could do that by recognizing if the clobber instructions are obviously
+    // a common base + constant offset, and if the previous store (or memset)
+    // completely covers this load.  This sort of thing can happen in bitfield
+    // access code.
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
+      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
+        if (Offset != -1) {
+          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+                                                 L->getType(), L, *TD);
+          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
+                       << *AvailVal << '\n' << *L << "\n\n\n");
+    
+          // Replace the load!
+          L->replaceAllUsesWith(AvailVal);
+          if (isa<PointerType>(AvailVal->getType()))
+            MD->invalidateCachedPointerInfo(AvailVal);
+          toErase.push_back(L);
+          NumGVNLoad++;
+          return true;
+        }
+      }
+    
     DEBUG(
       // fast print dep, using operator<< on instruction would be too slow
-      DOUT << "GVN: load ";
-      WriteAsOperand(*DOUT.stream(), L);
-      Instruction *I = dep.getInst();
-      DOUT << " is clobbered by " << *I;
+      errs() << "GVN: load ";
+      WriteAsOperand(errs(), L);
+      Instruction *I = Dep.getInst();
+      errs() << " is clobbered by " << *I << '\n';
     );
     return false;
   }
 
   // If it is defined in another block, try harder.
-  if (dep.isNonLocal())
+  if (Dep.isNonLocal())
     return processNonLocalLoad(L, toErase);
 
-  Instruction *DepInst = dep.getInst();
+  Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
-    // Only forward substitute stores to loads of the same type.
-    // FIXME: Could do better!
-    if (DepSI->getPointerOperand()->getType() != pointer->getType())
-      return false;
+    Value *StoredVal = DepSI->getOperand(0);
     
+    // The store and load are to a must-aliased pointer, but they may not
+    // actually have the same type.  See if we know how to reuse the stored
+    // value (depending on its type).
+    const TargetData *TD = 0;
+    if (StoredVal->getType() != L->getType() &&
+        (TD = getAnalysisIfAvailable<TargetData>())) {
+      StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+                                                 L, *TD);
+      if (StoredVal == 0)
+        return false;
+      
+      DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                   << '\n' << *L << "\n\n\n");
+    }
+
     // Remove it!
-    L->replaceAllUsesWith(DepSI->getOperand(0));
-    if (isa<PointerType>(DepSI->getOperand(0)->getType()))
-      MD->invalidateCachedPointerInfo(DepSI->getOperand(0));
+    L->replaceAllUsesWith(StoredVal);
+    if (isa<PointerType>(StoredVal->getType()))
+      MD->invalidateCachedPointerInfo(StoredVal);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
   }
 
   if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
-    // Only forward substitute stores to loads of the same type.
-    // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
-    if (DepLI->getType() != L->getType())
-      return false;
+    Value *AvailableVal = DepLI;
+    
+    // The loads are of a must-aliased pointer, but they may not actually have
+    // the same type.  See if we know how to reuse the previously loaded value
+    // (depending on its type).
+    const TargetData *TD = 0;
+    if (DepLI->getType() != L->getType() &&
+        (TD = getAnalysisIfAvailable<TargetData>())) {
+      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+      if (AvailableVal == 0)
+        return false;
+      
+      DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                   << "\n" << *L << "\n\n\n");
+    }
     
     // Remove it!
-    L->replaceAllUsesWith(DepLI);
+    L->replaceAllUsesWith(AvailableVal);
     if (isa<PointerType>(DepLI->getType()))
       MD->invalidateCachedPointerInfo(DepLI);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
   }
-  
+
   // If this load really doesn't depend on anything, then we must be loading an
   // undef value.  This can happen when loading for a fresh allocation with no
   // intervening stores, for example.
-  if (isa<AllocationInst>(DepInst)) {
-    L->replaceAllUsesWith(Context->getUndef(L->getType()));
+  if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
+    L->replaceAllUsesWith(UndefValue::get(L->getType()));
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1280,150 +1616,93 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   return false;
 }
 
-Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) {
+Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
   DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
   if (I == localAvail.end())
     return 0;
-  
-  ValueNumberScope* locals = I->second;
-  
-  while (locals) {
-    DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num);
-    if (I != locals->table.end())
+
+  ValueNumberScope *Locals = I->second;
+  while (Locals) {
+    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
+    if (I != Locals->table.end())
       return I->second;
-    else
-      locals = locals->parent;
+    Locals = Locals->parent;
   }
-  
+
   return 0;
 }
 
-/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
-/// by inheritance from the dominator fails, see if we can perform phi 
-/// construction to eliminate the redundancy.
-Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) {
-  BasicBlock* BaseBlock = orig->getParent();
-  
-  SmallPtrSet<BasicBlock*, 4> Visited;
-  SmallVector<BasicBlock*, 8> Stack;
-  Stack.push_back(BaseBlock);
-  
-  DenseMap<BasicBlock*, Value*> Results;
-  
-  // Walk backwards through our predecessors, looking for instances of the
-  // value number we're looking for.  Instances are recorded in the Results
-  // map, which is then used to perform phi construction.
-  while (!Stack.empty()) {
-    BasicBlock* Current = Stack.back();
-    Stack.pop_back();
-    
-    // If we've walked all the way to a proper dominator, then give up. Cases
-    // where the instance is in the dominator will have been caught by the fast
-    // path, and any cases that require phi construction further than this are
-    // probably not worth it anyways.  Note that this is a SIGNIFICANT compile
-    // time improvement.
-    if (DT->properlyDominates(Current, orig->getParent())) return 0;
-    
-    DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA =
-                                                       localAvail.find(Current);
-    if (LA == localAvail.end()) return 0;
-    DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno);
-    
-    if (V != LA->second->table.end()) {
-      // Found an instance, record it.
-      Results.insert(std::make_pair(Current, V->second));
-      continue;
-    }
-    
-    // If we reach the beginning of the function, then give up.
-    if (pred_begin(Current) == pred_end(Current))
-      return 0;
-    
-    for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current);
-         PI != PE; ++PI)
-      if (Visited.insert(*PI))
-        Stack.push_back(*PI);
-  }
-  
-  // If we didn't find instances, give up.  Otherwise, perform phi construction.
-  if (Results.size() == 0)
-    return 0;
-  else
-    return GetValueForBlock(BaseBlock, orig, Results, true);
-}
 
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I,
                              SmallVectorImpl<Instruction*> &toErase) {
-  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
-    bool changed = processLoad(L, toErase);
-    
-    if (!changed) {
-      unsigned num = VN.lookup_or_add(L);
-      localAvail[I->getParent()]->table.insert(std::make_pair(num, L));
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    bool Changed = processLoad(LI, toErase);
+
+    if (!Changed) {
+      unsigned Num = VN.lookup_or_add(LI);
+      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
     }
-    
-    return changed;
+
+    return Changed;
   }
-  
-  uint32_t nextNum = VN.getNextUnusedValueNumber();
-  unsigned num = VN.lookup_or_add(I);
-  
-  if (BranchInst* BI = dyn_cast<BranchInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
-    
+
+  uint32_t NextNum = VN.getNextUnusedValueNumber();
+  unsigned Num = VN.lookup_or_add(I);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
       return false;
-    
-    Value* branchCond = BI->getCondition();
-    uint32_t condVN = VN.lookup_or_add(branchCond);
-    
-    BasicBlock* trueSucc = BI->getSuccessor(0);
-    BasicBlock* falseSucc = BI->getSuccessor(1);
-    
-    if (trueSucc->getSinglePredecessor())
-      localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue();
-    if (falseSucc->getSinglePredecessor())
-      localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse();
+
+    Value *BranchCond = BI->getCondition();
+    uint32_t CondVN = VN.lookup_or_add(BranchCond);
+
+    BasicBlock *TrueSucc = BI->getSuccessor(0);
+    BasicBlock *FalseSucc = BI->getSuccessor(1);
+
+    if (TrueSucc->getSinglePredecessor())
+      localAvail[TrueSucc]->table[CondVN] =
+        ConstantInt::getTrue(TrueSucc->getContext());
+    if (FalseSucc->getSinglePredecessor())
+      localAvail[FalseSucc]->table[CondVN] =
+        ConstantInt::getFalse(TrueSucc->getContext());
 
     return false;
-    
+
   // Allocations are always uniquely numbered, so we can save time and memory
-  // by fast failing them.  
+  // by fast failing them.
   } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
     return false;
   }
-  
+
   // Collapse PHI nodes
   if (PHINode* p = dyn_cast<PHINode>(I)) {
-    Value* constVal = CollapsePhi(p);
-    
+    Value *constVal = CollapsePhi(p);
+
     if (constVal) {
-      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
-           PI != PE; ++PI)
-        PI->second.erase(p);
-        
       p->replaceAllUsesWith(constVal);
       if (isa<PointerType>(constVal->getType()))
         MD->invalidateCachedPointerInfo(constVal);
       VN.erase(p);
-      
+
       toErase.push_back(p);
     } else {
-      localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
     }
-  
+
   // If the number we were assigned was a brand new VN, then we don't
   // need to do a lookup to see if the number already exists
   // somewhere in the domtree: it can't!
-  } else if (num == nextNum) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
-    
+  } else if (Num == NextNum) {
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+
   // Perform fast-path value-number based elimination of values inherited from
   // dominators.
-  } else if (Value* repl = lookupNumber(I->getParent(), num)) {
+  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
     // Remove it!
     VN.erase(I);
     I->replaceAllUsesWith(repl);
@@ -1432,21 +1711,10 @@ bool GVN::processInstruction(Instruction *I,
     toErase.push_back(I);
     return true;
 
-#if 0
-  // Perform slow-pathvalue-number based elimination with phi construction.
-  } else if (Value* repl = AttemptRedundancyElimination(I, num)) {
-    // Remove it!
-    VN.erase(I);
-    I->replaceAllUsesWith(repl);
-    if (isa<PointerType>(repl->getType()))
-      MD->invalidateCachedPointerInfo(repl);
-    toErase.push_back(I);
-    return true;
-#endif
   } else {
-    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
   }
-  
+
   return false;
 }
 
@@ -1457,35 +1725,35 @@ bool GVN::runOnFunction(Function& F) {
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
-  
-  bool changed = false;
-  bool shouldContinue = true;
-  
+
+  bool Changed = false;
+  bool ShouldContinue = true;
+
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
-    BasicBlock* BB = FI;
+    BasicBlock *BB = FI;
     ++FI;
     bool removedBlock = MergeBlockIntoPredecessor(BB, this);
     if (removedBlock) NumGVNBlocks++;
-    
-    changed |= removedBlock;
+
+    Changed |= removedBlock;
   }
-  
+
   unsigned Iteration = 0;
-  
-  while (shouldContinue) {
-    DEBUG(cerr << "GVN iteration: " << Iteration << "\n");
-    shouldContinue = iterateOnFunction(F);
-    changed |= shouldContinue;
+
+  while (ShouldContinue) {
+    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
+    ShouldContinue = iterateOnFunction(F);
+    Changed |= ShouldContinue;
     ++Iteration;
   }
-  
+
   if (EnablePRE) {
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
-      changed |= PREChanged;
+      Changed |= PREChanged;
     }
   }
   // FIXME: Should perform GVN again after PRE does something.  PRE can move
@@ -1495,27 +1763,27 @@ bool GVN::runOnFunction(Function& F) {
 
   cleanupGlobalSets();
 
-  return changed;
+  return Changed;
 }
 
 
-bool GVN::processBlock(BasicBlock* BB) {
+bool GVN::processBlock(BasicBlock *BB) {
   // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
   // incrementing BI before processing an instruction).
   SmallVector<Instruction*, 8> toErase;
-  bool changed_function = false;
-  
+  bool ChangedFunction = false;
+
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
        BI != BE;) {
-    changed_function |= processInstruction(BI, toErase);
+    ChangedFunction |= processInstruction(BI, toErase);
     if (toErase.empty()) {
       ++BI;
       continue;
     }
-    
+
     // If we need some instructions deleted, do it now.
     NumGVNInstr += toErase.size();
-    
+
     // Avoid iterator invalidation.
     bool AtStart = BI == BB->begin();
     if (!AtStart)
@@ -1523,7 +1791,7 @@ bool GVN::processBlock(BasicBlock* BB) {
 
     for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
          E = toErase.end(); I != E; ++I) {
-      DEBUG(cerr << "GVN removed: " << **I);
+      DEBUG(errs() << "GVN removed: " << **I << '\n');
       MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
@@ -1535,8 +1803,8 @@ bool GVN::processBlock(BasicBlock* BB) {
     else
       ++BI;
   }
-  
-  return changed_function;
+
+  return ChangedFunction;
 }
 
 /// performPRE - Perform a purely local form of PRE that looks for diamond
@@ -1547,32 +1815,33 @@ bool GVN::performPRE(Function& F) {
   DenseMap<BasicBlock*, Value*> predMap;
   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
-    BasicBlock* CurrentBlock = *DI;
-    
+    BasicBlock *CurrentBlock = *DI;
+
     // Nothing to PRE in the entry block.
     if (CurrentBlock == &F.getEntryBlock()) continue;
-    
+
     for (BasicBlock::iterator BI = CurrentBlock->begin(),
          BE = CurrentBlock->end(); BI != BE; ) {
       Instruction *CurInst = BI++;
 
-      if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
-          isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) ||
+      if (isa<AllocationInst>(CurInst) ||
+          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+          CurInst->getType()->isVoidTy() ||
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
           isa<DbgInfoIntrinsic>(CurInst))
         continue;
 
-      uint32_t valno = VN.lookup(CurInst);
-      
+      uint32_t ValNo = VN.lookup(CurInst);
+
       // Look for the predecessors for PRE opportunities.  We're
       // only trying to solve the basic diamond case, where
       // a value is computed in the successor and one predecessor,
       // but not the other.  We also explicitly disallow cases
       // where the successor is its own predecessor, because they're
       // more complicated to get right.
-      unsigned numWith = 0;
-      unsigned numWithout = 0;
-      BasicBlock* PREPred = 0;
+      unsigned NumWith = 0;
+      unsigned NumWithout = 0;
+      BasicBlock *PREPred = 0;
       predMap.clear();
 
       for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -1581,59 +1850,59 @@ bool GVN::performPRE(Function& F) {
         // own predecessor, on in blocks with predecessors
         // that are not reachable.
         if (*PI == CurrentBlock) {
-          numWithout = 2;
+          NumWithout = 2;
           break;
         } else if (!localAvail.count(*PI))  {
-          numWithout = 2;
+          NumWithout = 2;
           break;
         }
-        
-        DenseMap<uint32_t, Value*>::iterator predV = 
-                                            localAvail[*PI]->table.find(valno);
+
+        DenseMap<uint32_t, Value*>::iterator predV =
+                                            localAvail[*PI]->table.find(ValNo);
         if (predV == localAvail[*PI]->table.end()) {
           PREPred = *PI;
-          numWithout++;
+          NumWithout++;
         } else if (predV->second == CurInst) {
-          numWithout = 2;
+          NumWithout = 2;
         } else {
           predMap[*PI] = predV->second;
-          numWith++;
+          NumWith++;
         }
       }
-      
+
       // Don't do PRE when it might increase code size, i.e. when
       // we would need to insert instructions in more than one pred.
-      if (numWithout != 1 || numWith == 0)
+      if (NumWithout != 1 || NumWith == 0)
         continue;
-      
+
       // We can't do PRE safely on a critical edge, so instead we schedule
       // the edge to be split and perform the PRE the next time we iterate
       // on the function.
-      unsigned succNum = 0;
+      unsigned SuccNum = 0;
       for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
            i != e; ++i)
         if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
-          succNum = i;
+          SuccNum = i;
           break;
         }
-        
-      if (isCriticalEdge(PREPred->getTerminator(), succNum)) {
-        toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum));
+
+      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
         continue;
       }
-      
+
       // Instantiate the expression the in predecessor that lacked it.
       // Because we are going top-down through the block, all value numbers
       // will be available in the predecessor by the time we need them.  Any
       // that weren't original present will have been instantiated earlier
       // in this loop.
-      Instruction* PREInstr = CurInst->clone();
+      Instruction *PREInstr = CurInst->clone();
       bool success = true;
       for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
         Value *Op = PREInstr->getOperand(i);
         if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
           continue;
-        
+
         if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
           PREInstr->setOperand(i, V);
         } else {
@@ -1641,25 +1910,25 @@ bool GVN::performPRE(Function& F) {
           break;
         }
       }
-      
+
       // Fail out if we encounter an operand that is not available in
-      // the PRE predecessor.  This is typically because of loads which 
+      // the PRE predecessor.  This is typically because of loads which
       // are not value numbered precisely.
       if (!success) {
         delete PREInstr;
         DEBUG(verifyRemoved(PREInstr));
         continue;
       }
-      
+
       PREInstr->insertBefore(PREPred->getTerminator());
       PREInstr->setName(CurInst->getName() + ".pre");
       predMap[PREPred] = PREInstr;
-      VN.add(PREInstr, valno);
+      VN.add(PREInstr, ValNo);
       NumGVNPRE++;
-      
+
       // Update the availability map to include the new instruction.
-      localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr));
-      
+      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+
       // Create a PHI to make the value available in this block.
       PHINode* Phi = PHINode::Create(CurInst->getType(),
                                      CurInst->getName() + ".pre-phi",
@@ -1667,27 +1936,27 @@ bool GVN::performPRE(Function& F) {
       for (pred_iterator PI = pred_begin(CurrentBlock),
            PE = pred_end(CurrentBlock); PI != PE; ++PI)
         Phi->addIncoming(predMap[*PI], *PI);
-      
-      VN.add(Phi, valno);
-      localAvail[CurrentBlock]->table[valno] = Phi;
-      
+
+      VN.add(Phi, ValNo);
+      localAvail[CurrentBlock]->table[ValNo] = Phi;
+
       CurInst->replaceAllUsesWith(Phi);
       if (isa<PointerType>(Phi->getType()))
         MD->invalidateCachedPointerInfo(Phi);
       VN.erase(CurInst);
-      
-      DEBUG(cerr << "GVN PRE removed: " << *CurInst);
+
+      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
       MD->removeInstruction(CurInst);
       CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
       Changed = true;
     }
   }
-  
+
   for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
        I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
     SplitCriticalEdge(I->first, I->second, this);
-  
+
   return Changed || toSplit.size();
 }
 
@@ -1705,25 +1974,24 @@ bool GVN::iterateOnFunction(Function &F) {
   }
 
   // Top-down walk of the dominator tree
-  bool changed = false;
+  bool Changed = false;
 #if 0
   // Needed for value numbering with phi construction to work.
   ReversePostOrderTraversal<Function*> RPOT(&F);
   for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
        RE = RPOT.end(); RI != RE; ++RI)
-    changed |= processBlock(*RI);
+    Changed |= processBlock(*RI);
 #else
   for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
        DE = df_end(DT->getRootNode()); DI != DE; ++DI)
-    changed |= processBlock(DI->getBlock());
+    Changed |= processBlock(DI->getBlock());
 #endif
 
-  return changed;
+  return Changed;
 }
 
 void GVN::cleanupGlobalSets() {
   VN.clear();
-  phiMap.clear();
 
   for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
        I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
@@ -1736,18 +2004,6 @@ void GVN::cleanupGlobalSets() {
 void GVN::verifyRemoved(const Instruction *Inst) const {
   VN.verifyRemoved(Inst);
 
-  // Walk through the PHI map to make sure the instruction isn't hiding in there
-  // somewhere.
-  for (PhiMapType::iterator
-         I = phiMap.begin(), E = phiMap.end(); I != E; ++I) {
-    assert(I->first != Inst && "Inst is still a key in PHI map!");
-
-    for (SmallPtrSet<Instruction*, 4>::iterator
-           II = I->second.begin(), IE = I->second.end(); II != IE; ++II) {
-      assert(*II != Inst && "Inst is still a value in PHI map!");
-    }
-  }
-
   // Walk through the value number scope to make sure the instruction isn't
   // ferreted away in it.
   for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 88cf60e..e2d9e0b 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -51,11 +51,11 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -67,7 +67,7 @@ STATISTIC(NumReplaced, "Number of exit values replaced");
 STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
 
 namespace {
-  class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass {
+  class IndVarSimplify : public LoopPass {
     IVUsers         *IU;
     LoopInfo        *LI;
     ScalarEvolution *SE;
@@ -75,30 +75,30 @@ namespace {
     bool Changed;
   public:
 
-   static char ID; // Pass identification, replacement for typeid
-   IndVarSimplify() : LoopPass(&ID) {}
-
-   virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-     AU.addRequired<DominatorTree>();
-     AU.addRequired<ScalarEvolution>();
-     AU.addRequiredID(LoopSimplifyID);
-     AU.addRequired<LoopInfo>();
-     AU.addRequired<IVUsers>();
-     AU.addRequiredID(LCSSAID);
-     AU.addPreserved<ScalarEvolution>();
-     AU.addPreservedID(LoopSimplifyID);
-     AU.addPreserved<IVUsers>();
-     AU.addPreservedID(LCSSAID);
-     AU.setPreservesCFG();
-   }
+    static char ID; // Pass identification, replacement for typeid
+    IndVarSimplify() : LoopPass(&ID) {}
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addRequired<IVUsers>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<IVUsers>();
+      AU.setPreservesCFG();
+    }
 
   private:
 
     void RewriteNonIntegerIVs(Loop *L);
 
-    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount,
+    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                    Value *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
@@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() {
 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
 /// is actually a much broader range than just linear tests.
 ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
-                                   const SCEV* BackedgeTakenCount,
+                                   const SCEV *BackedgeTakenCount,
                                    Value *IndVar,
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
@@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
   Value *CmpIndVar;
-  const SCEV* RHS = BackedgeTakenCount;
+  const SCEV *RHS = BackedgeTakenCount;
   if (ExitingBlock == L->getLoopLatch()) {
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
-    const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
-    const SCEV* N =
+    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+    const SCEV *N =
       SE->getAddExpr(BackedgeTakenCount,
                      SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
     if ((isa<SCEVConstant>(N) && !N->isZero()) ||
@@ -182,13 +182,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   else
     Opcode = ICmpInst::ICMP_EQ;
 
-  DOUT << "INDVARS: Rewriting loop exit condition to:\n"
-       << "      LHS:" << *CmpIndVar // includes a newline
-       << "       op:\t"
-       << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-       << "      RHS:\t" << *RHS << "\n";
+  DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
 
-  ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
 
   Instruction *OrigCond = cast<Instruction>(BI->getCondition());
   // It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -264,7 +264,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         // Okay, this instruction has a user outside of the current loop
         // and varies predictably *inside* the loop.  Evaluate the value it
         // contains when the loop exits, if possible.
-        const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L))
           continue;
 
@@ -273,25 +273,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
 
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
-        DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
-             << "  LoopVal = " << *Inst << "\n";
+        DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+                     << "  LoopVal = " << *Inst << "\n");
 
         PN->setIncomingValue(i, ExitVal);
 
         // If this instruction is dead now, delete it.
         RecursivelyDeleteTriviallyDeadInstructions(Inst);
 
-        // If we're inserting code into the exit block rather than the
-        // preheader, we can (and have to) remove the PHI entirely.
-        // This is safe, because the NewVal won't be variant
-        // in the loop, so we don't need an LCSSA phi node anymore.
-        if (ExitBlocks.size() == 1) {
+        if (NumPreds == 1) {
+          // Completely replace a single-pred PHI. This is safe, because the
+          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+          // node anymore.
           PN->replaceAllUsesWith(ExitVal);
           RecursivelyDeleteTriviallyDeadInstructions(PN);
-          break;
         }
       }
-      if (ExitBlocks.size() != 1) {
+      if (NumPreds != 1) {
         // Clone the PHI and delete the original one. This lets IVUsers and
         // any other maps purge the original user from their records.
         PHINode *NewPN = PN->clone();
@@ -339,7 +337,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   RewriteNonIntegerIVs(L);
 
   BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
   // Create a rewriter object which we'll use to transform the code with.
   SCEVExpander Rewriter(*SE);
@@ -367,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       NeedCannIV = true;
   }
   for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV* Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
     const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
     if (!LargestType ||
         SE->getTypeSizeInBits(Ty) >
           SE->getTypeSizeInBits(LargestType))
       LargestType = Ty;
 
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[i]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
 
@@ -403,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     ++NumInserted;
     Changed = true;
-    DOUT << "INDVARS: New CanIV: " << *IndVar;
+    DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n');
 
     // Now that the official induction variable is established, reinsert
     // the old canonical-looking variable after it so that the IR remains
@@ -458,9 +456,9 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV* Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
 
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[i]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
     ilist<IVStrideUse> &List = SI->second->Users;
@@ -471,7 +469,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
       Instruction *User = UI->getUser();
 
       // Compute the final addrec to expand into code.
-      const SCEV* AR = IU->getReplacementExpr(*UI);
+      const SCEV *AR = IU->getReplacementExpr(*UI);
 
       // FIXME: It is an extremely bad idea to indvar substitute anything more
       // complex than affine induction variables.  Doing so will put expensive
@@ -508,8 +506,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         NewVal->takeName(Op);
       User->replaceUsesOfWith(Op, NewVal);
       UI->setOperandValToReplace(NewVal);
-      DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op
-           << "   into = " << *NewVal << "\n";
+      DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                   << "   into = " << *NewVal << "\n");
       ++NumRemoved;
       Changed = true;
 
@@ -546,8 +544,19 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
     // New instructions were inserted at the end of the preheader.
     if (isa<PHINode>(I))
       break;
-    if (I->isTrapping())
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also
+    // don't move instructions which might read memory, since the loop may
+    // modify memory. Note that it's okay if the instruction might have
+    // undefined behavior: LoopSimplify guarantees that the preheader
+    // dominates the exit block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
       continue;
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
@@ -630,7 +639,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   // Check incoming value.
   ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));
   if (!InitValue) return;
-  uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t newInitValue =
+              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
     return;
 
@@ -646,7 +656,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
     IncrVIndex = 0;
   IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));
   if (!IncrValue) return;
-  uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t newIncrValue =
+              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))
     return;
 
@@ -677,7 +688,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
     EVIndex = 0;
   EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));
   if (!EV) return;
-  uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits();
+  uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
   if (!convertToInt(EV->getValueAPF(), &intEV))
     return;
 
@@ -710,24 +721,26 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;
 
   // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Type::Int32Ty,
+  PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()),
                                     PH->getName()+".int", PH);
-  NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue),
+  NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()),
+                                       newInitValue),
                       PH->getIncomingBlock(IncomingEdge));
 
   Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
-                                          Context->getConstantInt(Type::Int32Ty,
+                           ConstantInt::get(Type::getInt32Ty(PH->getContext()),
                                                              newIncrValue),
                                             Incr->getName()+".int", Incr);
   NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
 
   // The back edge is edge 1 of newPHI, whatever it may have been in the
   // original PHI.
-  ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV);
+  ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()),
+                                        intEV);
   Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
   Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
-  ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
-                                 EC->getParent()->getTerminator());
+  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
+                                 NewPred, LHS, RHS, EC->getName());
 
   // In the following deltions, PH may become dead and may be deleted.
   // Use a WeakVH to observe whether this happens.
@@ -739,7 +752,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   RecursivelyDeleteTriviallyDeadInstructions(EC);
 
   // Delete old, floating point, increment instruction.
-  Incr->replaceAllUsesWith(Context->getUndef(Incr->getType()));
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
   RecursivelyDeleteTriviallyDeadInstructions(Incr);
 
   // Replace floating induction variable, if it isn't already deleted.
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 59fbd39..7c96c49 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -40,7 +40,9 @@
 #include "llvm/Pass.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MallocHelper.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -48,11 +50,13 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -60,7 +64,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 #include <climits>
-#include <sstream>
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -71,29 +74,49 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated");
 STATISTIC(NumSunkInst , "Number of instructions sunk");
 
 namespace {
-  class VISIBILITY_HIDDEN InstCombiner
-    : public FunctionPass,
-      public InstVisitor<InstCombiner, Instruction*> {
-    // Worklist of all of the instructions that need to be simplified.
+  /// InstCombineWorklist - This is the worklist management logic for
+  /// InstCombine.
+  class InstCombineWorklist {
     SmallVector<Instruction*, 256> Worklist;
     DenseMap<Instruction*, unsigned> WorklistMap;
-    TargetData *TD;
-    bool MustPreserveLCSSA;
+    
+    void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
+    InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
   public:
-    static char ID; // Pass identification, replacement for typeid
-    InstCombiner() : FunctionPass(&ID) {}
-
-    LLVMContext* getContext() { return Context; }
-
-    /// AddToWorkList - Add the specified instruction to the worklist if it
-    /// isn't already in it.
-    void AddToWorkList(Instruction *I) {
-      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second)
+    InstCombineWorklist() {}
+    
+    bool isEmpty() const { return Worklist.empty(); }
+    
+    /// Add - Add the specified instruction to the worklist if it isn't already
+    /// in it.
+    void Add(Instruction *I) {
+      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+        DEBUG(errs() << "IC: ADD: " << *I << '\n');
         Worklist.push_back(I);
+      }
+    }
+    
+    void AddValue(Value *V) {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        Add(I);
     }
     
-    // RemoveFromWorkList - remove I from the worklist if it exists.
-    void RemoveFromWorkList(Instruction *I) {
+    /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+    /// which should only be done when the worklist is empty and when the group
+    /// has no duplicates.
+    void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+      assert(Worklist.empty() && "Worklist must be empty to add initial group");
+      Worklist.reserve(NumEntries+16);
+      DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+      for (; NumEntries; --NumEntries) {
+        Instruction *I = List[NumEntries-1];
+        WorklistMap.insert(std::make_pair(I, Worklist.size()));
+        Worklist.push_back(I);
+      }
+    }
+    
+    // Remove - remove I from the worklist if it exists.
+    void Remove(Instruction *I) {
       DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
       if (It == WorklistMap.end()) return; // Not in worklist.
       
@@ -103,51 +126,74 @@ namespace {
       WorklistMap.erase(It);
     }
     
-    Instruction *RemoveOneFromWorkList() {
+    Instruction *RemoveOne() {
       Instruction *I = Worklist.back();
       Worklist.pop_back();
       WorklistMap.erase(I);
       return I;
     }
 
-    
     /// AddUsersToWorkList - When an instruction is simplified, add all users of
     /// the instruction to the work lists because they might get more simplified
     /// now.
     ///
-    void AddUsersToWorkList(Value &I) {
+    void AddUsersToWorkList(Instruction &I) {
       for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
            UI != UE; ++UI)
-        AddToWorkList(cast<Instruction>(*UI));
-    }
-
-    /// AddUsesToWorkList - When an instruction is simplified, add operands to
-    /// the work lists because they might get more simplified now.
-    ///
-    void AddUsesToWorkList(Instruction &I) {
-      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(*i))
-          AddToWorkList(Op);
+        Add(cast<Instruction>(*UI));
     }
     
-    /// AddSoonDeadInstToWorklist - The specified instruction is about to become
-    /// dead.  Add all of its operands to the worklist, turning them into
-    /// undef's to reduce the number of uses of those instructions.
-    ///
-    /// Return the specified operand before it is turned into an undef.
-    ///
-    Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
-      Value *R = I.getOperand(op);
-      
-      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(*i)) {
-          AddToWorkList(Op);
-          // Set the operand to undef to drop the use.
-          *i = Context->getUndef(Op->getType());
-        }
+    
+    /// Zap - check that the worklist is empty and nuke the backing store for
+    /// the map if it is large.
+    void Zap() {
+      assert(WorklistMap.empty() && "Worklist empty, but map not?");
       
-      return R;
+      // Do an explicit clear, this shrinks the map if needed.
+      WorklistMap.clear();
     }
+  };
+} // end anonymous namespace.
+
+
+namespace {
+  /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+  /// just like the normal insertion helper, but also adds any new instructions
+  /// to the instcombine worklist.
+  class InstCombineIRInserter : public IRBuilderDefaultInserter<true> {
+    InstCombineWorklist &Worklist;
+  public:
+    InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+    
+    void InsertHelper(Instruction *I, const Twine &Name,
+                      BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+      IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+      Worklist.Add(I);
+    }
+  };
+} // end anonymous namespace
+
+
+namespace {
+  class InstCombiner : public FunctionPass,
+                       public InstVisitor<InstCombiner, Instruction*> {
+    TargetData *TD;
+    bool MustPreserveLCSSA;
+    bool MadeIRChange;
+  public:
+    /// Worklist - All of the instructions that need to be simplified.
+    InstCombineWorklist Worklist;
+
+    /// Builder - This is an IRBuilder that automatically inserts new
+    /// instructions into the worklist when they are created.
+    typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy;
+    BuilderTy *Builder;
+        
+    static char ID; // Pass identification, replacement for typeid
+    InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+
+    LLVMContext *Context;
+    LLVMContext *getContext() const { return Context; }
 
   public:
     virtual bool runOnFunction(Function &F);
@@ -155,12 +201,11 @@ namespace {
     bool DoOneIteration(Function &F, unsigned ItNum);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
       AU.addPreservedID(LCSSAID);
       AU.setPreservesCFG();
     }
 
-    TargetData &getTargetData() const { return *TD; }
+    TargetData *getTargetData() const { return TD; }
 
     // Visitation implementation - Implement instruction combining for different
     // instruction types.  The semantics are as follows:
@@ -187,8 +232,10 @@ namespace {
     Instruction *visitSDiv(BinaryOperator &I);
     Instruction *visitFDiv(BinaryOperator &I);
     Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+    Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
     Instruction *visitAnd(BinaryOperator &I);
     Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+    Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
     Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                      Value *A, Value *B, Value *C);
     Instruction *visitOr (BinaryOperator &I);
@@ -208,7 +255,7 @@ namespace {
     Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
                                 ConstantInt *DivRHS);
 
-    Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS,
+    Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                              ICmpInst::Predicate Cond, Instruction &I);
     Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                      BinaryOperator &I);
@@ -269,30 +316,10 @@ namespace {
              "New instruction already inserted into a basic block!");
       BasicBlock *BB = Old.getParent();
       BB->getInstList().insert(&Old, New);  // Insert inst
-      AddToWorkList(New);
+      Worklist.Add(New);
       return New;
     }
-
-    /// InsertCastBefore - Insert a cast of V to TY before the instruction POS.
-    /// This also adds the cast to the worklist.  Finally, this returns the
-    /// cast.
-    Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty,
-                            Instruction &Pos) {
-      if (V->getType() == Ty) return V;
-
-      if (Constant *CV = dyn_cast<Constant>(V))
-        return Context->getConstantExprCast(opc, CV, Ty);
-      
-      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
-      AddToWorkList(C);
-      return C;
-    }
         
-    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
-      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
-    }
-
-
     // ReplaceInstUsesWith - This method is to be used when an instruction is
     // found to be dead, replacable with another preexisting expression.  Here
     // we add all uses of I to the worklist, replace all uses of I with the new
@@ -300,16 +327,15 @@ namespace {
     // modified.
     //
     Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
-      AddUsersToWorkList(I);         // Add all modified instrs to worklist
-      if (&I != V) {
-        I.replaceAllUsesWith(V);
-        return &I;
-      } else {
-        // If we are replacing the instruction with itself, this must be in a
-        // segment of unreachable code, so just clobber the instruction.
-        I.replaceAllUsesWith(Context->getUndef(I.getType()));
-        return &I;
-      }
+      Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
+      
+      // If we are replacing the instruction with itself, this must be in a
+      // segment of unreachable code, so just clobber the instruction.
+      if (&I == V) 
+        V = UndefValue::get(I.getType());
+        
+      I.replaceAllUsesWith(V);
+      return &I;
     }
 
     // EraseInstFromFunction - When dealing with an instruction that has side
@@ -317,10 +343,19 @@ namespace {
     // instruction.  Instead, visit methods should return the value returned by
     // this function.
     Instruction *EraseInstFromFunction(Instruction &I) {
+      DEBUG(errs() << "IC: ERASE " << I << '\n');
+
       assert(I.use_empty() && "Cannot erase instruction that is used!");
-      AddUsesToWorkList(I);
-      RemoveFromWorkList(&I);
+      // Make sure that we reprocess all operands now that we reduced their
+      // use counts.
+      if (I.getNumOperands() < 8) {
+        for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+          if (Instruction *Op = dyn_cast<Instruction>(*i))
+            Worklist.Add(Op);
+      }
+      Worklist.Remove(&I);
       I.eraseFromParent();
+      MadeIRChange = true;
       return 0;  // Don't do anything with FI
     }
         
@@ -364,10 +399,15 @@ namespace {
     Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       APInt& UndefElts, unsigned Depth = 0);
       
-    // FoldOpIntoPhi - Given a binary operator or cast instruction which has a
-    // PHI node as operand #0, see if we can fold the instruction into the PHI
-    // (which is only possible if all operands to the PHI are constants).
-    Instruction *FoldOpIntoPhi(Instruction &I);
+    // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+    // which has a PHI node as operand #0, see if we can fold the instruction
+    // into the PHI (which is only possible if all operands to the PHI are
+    // constants).
+    //
+    // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+    // that would normally be unprofitable because they strongly encourage jump
+    // threading.
+    Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
 
     // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
     // operator and they all are only used by the PHI, PHI together their
@@ -399,7 +439,7 @@ namespace {
                                         unsigned PrefAlign = 0);
 
   };
-}
+} // end anonymous namespace
 
 char InstCombiner::ID = 0;
 static RegisterPass<InstCombiner>
@@ -409,7 +449,8 @@ X("instcombine", "Combine redundant instructions");
 //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
+    if (BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) ||
         BinaryOperator::isNot(V))
       return 3;
     return 4;
@@ -429,7 +470,7 @@ static bool isOnlyUse(Value *V) {
 static const Type *getPromotedType(const Type *Ty) {
   if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
     if (ITy->getBitWidth() < 32)
-      return Type::Int32Ty;
+      return Type::getInt32Ty(Ty->getContext());
   }
   return Ty;
 }
@@ -438,29 +479,12 @@ static const Type *getPromotedType(const Type *Ty) {
 /// expression bitcast, or a GetElementPtrInst with all zero indices, return the
 /// operand value, otherwise return null.
 static Value *getBitCastOperand(Value *V) {
-  if (BitCastInst *I = dyn_cast<BitCastInst>(V))
-    // BitCastInst?
-    return I->getOperand(0);
-  else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
-    // GetElementPtrInst?
-    if (GEP->hasAllZeroIndices())
-      return GEP->getOperand(0);
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() == Instruction::BitCast)
-      // BitCast ConstantExp?
-      return CE->getOperand(0);
-    else if (CE->getOpcode() == Instruction::GetElementPtr) {
-      // GetElementPtr ConstantExp?
-      for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end();
-           I != E; ++I) {
-        ConstantInt *CI = dyn_cast<ConstantInt>(I);
-        if (!CI || !CI->isZero())
-          // Any non-zero indices? Not cast-like.
-          return 0;
-      }
-      // All-zero indices? This is just like casting.
-      return CE->getOperand(0);
-    }
+  if (Operator *O = dyn_cast<Operator>(V)) {
+    if (O->getOpcode() == Instruction::BitCast)
+      return O->getOperand(0);
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      if (GEP->hasAllZeroIndices())
+        return GEP->getPointerOperand();
   }
   return 0;
 }
@@ -474,7 +498,7 @@ isEliminableCastPair(
   const Type *DstTy,     ///< The target type for the second cast instruction
   TargetData *TD         ///< The target data for pointer size
 ) {
-  
+
   const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
   const Type *MidTy = CI->getType();                  // B from above
 
@@ -483,12 +507,15 @@ isEliminableCastPair(
   Instruction::CastOps secondOp = Instruction::CastOps(opcode);
 
   unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
-                                                DstTy, TD->getIntPtrType());
+                                                DstTy,
+                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
   
   // We don't want to form an inttoptr or ptrtoint that converts to an integer
   // type that differs from the pointer size.
-  if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) ||
-      (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType()))
+  if ((Res == Instruction::IntToPtr &&
+          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+      (Res == Instruction::PtrToInt &&
+          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
     Res = 0;
   
   return Instruction::CastOps(Res);
@@ -503,7 +530,7 @@ static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V,
   
   // If this is another cast that can be eliminated, it isn't codegen either.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD)) 
+    if (isEliminableCastPair(CI, opcode, Ty, TD))
       return false;
   return true;
 }
@@ -528,7 +555,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
   if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
     if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
       if (isa<Constant>(I.getOperand(1))) {
-        Constant *Folded = Context->getConstantExpr(I.getOpcode(),
+        Constant *Folded = ConstantExpr::get(I.getOpcode(),
                                              cast<Constant>(I.getOperand(1)),
                                              cast<Constant>(Op->getOperand(1)));
         I.setOperand(0, Op->getOperand(0));
@@ -541,11 +568,11 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
           Constant *C2 = cast<Constant>(Op1->getOperand(1));
 
           // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-          Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2);
+          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
           Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
                                                     Op1->getOperand(0),
                                                     Op1->getName(), &I);
-          AddToWorkList(New);
+          Worklist.Add(New);
           I.setOperand(0, New);
           I.setOperand(1, Folded);
           return true;
@@ -568,17 +595,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) {
 // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
 // if the LHS is a constant zero (which is the 'negate' form).
 //
-static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castNegVal(Value *V) {
   if (BinaryOperator::isNeg(V))
     return BinaryOperator::getNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return Context->getConstantExprNeg(C);
+    return ConstantExpr::getNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isInteger())
-      return Context->getConstantExprNeg(C);
+      return ConstantExpr::getNeg(C);
 
   return 0;
 }
@@ -587,28 +614,28 @@ static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {
 // instruction if the LHS is a constant negative zero (which is the 'negate'
 // form).
 //
-static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castFNegVal(Value *V) {
   if (BinaryOperator::isFNeg(V))
     return BinaryOperator::getFNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantFP *C = dyn_cast<ConstantFP>(V))
-    return Context->getConstantExprFNeg(C);
+    return ConstantExpr::getFNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
     if (C->getType()->getElementType()->isFloatingPoint())
-      return Context->getConstantExprFNeg(C);
+      return ConstantExpr::getFNeg(C);
 
   return 0;
 }
 
-static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {
+static inline Value *dyn_castNotVal(Value *V) {
   if (BinaryOperator::isNot(V))
     return BinaryOperator::getNotArgument(V);
 
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return Context->getConstantInt(~C->getValue());
+    return ConstantInt::get(C->getType(), ~C->getValue());
   return 0;
 }
 
@@ -617,8 +644,7 @@ static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {
 // non-constant operand of the multiply, and set CST to point to the multiplier.
 // Otherwise, return null.
 //
-static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,
-                                         LLVMContext* Context) {
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
   if (V->hasOneUse() && V->getType()->isInteger())
     if (Instruction *I = dyn_cast<Instruction>(V)) {
       if (I->getOpcode() == Instruction::Mul)
@@ -629,48 +655,27 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,
           // The multiplier is really 1 << CST.
           uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
           uint32_t CSTVal = CST->getLimitedValue(BitWidth);
-          CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal));
+          CST = ConstantInt::get(V->getType()->getContext(),
+                                 APInt(BitWidth, 1).shl(CSTVal));
           return I->getOperand(0);
         }
     }
   return 0;
 }
 
-/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant
-/// expression, return it.
-static User *dyn_castGetElementPtr(Value *V) {
-  if (isa<GetElementPtrInst>(V)) return cast<User>(V);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::GetElementPtr)
-      return cast<User>(V);
-  return false;
-}
-
-/// getOpcode - If this is an Instruction or a ConstantExpr, return the
-/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getOpcode();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    return CE->getOpcode();
-  // Use UserOp1 to mean there's no opcode.
-  return Instruction::UserOp1;
-}
-
 /// AddOne - Add one to a ConstantInt
-static Constant *AddOne(Constant *C, LLVMContext* Context) {
-  return Context->getConstantExprAdd(C, 
-    Context->getConstantInt(C->getType(), 1));
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, 
+    ConstantInt::get(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C, LLVMContext* Context) {
-  return Context->getConstantExprSub(C, 
-    Context->getConstantInt(C->getType(), 1));
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantExpr::getSub(C, 
+    ConstantInt::get(C->getType(), 1));
 }
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
-static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,
-                              LLVMContext* Context) {
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
   uint32_t W = C1->getBitWidth();
   APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
   if (sign) {
@@ -697,7 +702,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
-                                   APInt Demanded, LLVMContext* Context) {
+                                   APInt Demanded) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
 
@@ -712,7 +717,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
 
   // This instruction is producing bits that are not demanded. Shrink the RHS.
   Demanded &= OpC->getValue();
-  I->setOperand(OpNo, Context->getConstantInt(Demanded));
+  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
   return true;
 }
 
@@ -784,7 +789,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
                                           KnownZero, KnownOne, Depth);
   if (NewVal == 0) return false;
-  U.set(NewVal);
+  U = NewVal;
   return true;
 }
 
@@ -844,7 +849,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   if (DemandedMask == 0) {   // Not demanding any bits from V.
     if (isa<UndefValue>(V))
       return 0;
-    return Context->getUndef(VTy);
+    return UndefValue::get(VTy);
   }
   
   if (Depth == 6)        // Limit search depth.
@@ -886,7 +891,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       
       // If all of the demanded bits in the inputs are known zeros, return zero.
       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-        return Context->getNullValue(VTy);
+        return Constant::getNullValue(VTy);
       
     } else if (I->getOpcode() == Instruction::Or) {
       // We can simplify (X|Y) -> X or Y in the user's context if we know that
@@ -955,10 +960,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-      return Context->getNullValue(VTy);
+      return Constant::getNullValue(VTy);
       
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
       return I;
       
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -995,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I->getOperand(1);
         
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
           
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1030,7 +1035,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
-      Instruction *Or =
+      Instruction *Or = 
         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
       return InsertNewInstBefore(Or, *I);
@@ -1043,7 +1048,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
       // all known
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
-        Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask);
+        Constant *AndC = Constant::getIntegerValue(VTy,
+                                                   ~RHSKnownOne & DemandedMask);
         Instruction *And = 
           BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
         return InsertNewInstBefore(And, *I);
@@ -1052,9 +1058,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     
     // If the RHS is a constant, see if we can simplify it.
     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
     
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstBefore(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstBefore(NewXor, *I);
+      }
+          
+          
     RHSKnownZero = KnownZeroOut;
     RHSKnownOne  = KnownOneOut;
     break;
@@ -1069,8 +1102,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
     
     // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) ||
-        ShrinkDemandedConstant(I, 2, DemandedMask, Context))
+    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask))
       return I;
     
     // Only known if known in both the LHS and RHS.
@@ -1194,7 +1227,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
       // If the RHS of the add has bits set that can't affect the input, reduce
       // the constant.
-      if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context))
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
         return I;
       
       // Avoid excess work.
@@ -1415,10 +1448,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           Instruction *NewVal;
           if (InputBit > ResultBit)
             NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
-                    Context->getConstantInt(I->getType(), InputBit-ResultBit));
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
           else
             NewVal = BinaryOperator::CreateShl(I->getOperand(1),
-                    Context->getConstantInt(I->getType(), ResultBit-InputBit));
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
           NewVal->takeName(I);
           return InsertNewInstBefore(NewVal, *I);
         }
@@ -1434,12 +1467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   
   // If the client is only demanding bits that we know, return the known
   // constant.
-  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
-    Constant *C = Context->getConstantInt(RHSKnownOne);
-    if (isa<PointerType>(V->getType()))
-      C = Context->getConstantExprIntToPtr(C, V->getType());
-    return C;
-  }
+  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, RHSKnownOne);
   return false;
 }
 
@@ -1465,13 +1494,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     return 0;
   } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
-    return Context->getUndef(V->getType());
+    return UndefValue::get(V->getType());
   }
 
   UndefElts = 0;
   if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Undef = Context->getUndef(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
 
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i)
@@ -1486,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       }
 
     // If we changed the constant, return it.
-    Constant *NewCP = Context->getConstantVector(Elts);
+    Constant *NewCP = ConstantVector::get(Elts);
     return NewCP != CP ? NewCP : 0;
   } else if (isa<ConstantAggregateZero>(V)) {
     // Simplify the CAZ to a ConstantVector where the non-demanded elements are
@@ -1498,15 +1527,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       return 0;
     
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Zero = Context->getNullValue(EltTy);
-    Constant *Undef = Context->getUndef(EltTy);
+    Constant *Zero = Constant::getNullValue(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
     std::vector<Constant*> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       Constant *Elt = DemandedElts[i] ? Zero : Undef;
       Elts.push_back(Elt);
     }
     UndefElts = DemandedElts ^ EltMask;
-    return Context->getConstantVector(Elts);
+    return ConstantVector::get(Elts);
   }
   
   // Limit search depth.
@@ -1553,8 +1582,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
     unsigned IdxNo = Idx->getZExtValue();
-    if (IdxNo >= VWidth || !DemandedElts[IdxNo])
-      return AddSoonDeadInstToWorklist(*I, 0);
+    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+      Worklist.Add(I);
+      return I->getOperand(0);
+    }
     
     // Otherwise, the element inserted overwrites whatever was there, so the
     // input demanded set is simpler than the output set.
@@ -1620,12 +1651,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       std::vector<Constant*> Elts;
       for (unsigned i = 0; i < VWidth; ++i) {
         if (UndefElts[i])
-          Elts.push_back(Context->getUndef(Type::Int32Ty));
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
         else
-          Elts.push_back(Context->getConstantInt(Type::Int32Ty,
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
                                           Shuffle->getMaskValue(i)));
       }
-      I->setOperand(2, Context->getConstantVector(Elts));
+      I->setOperand(2, ConstantVector::get(Elts));
       MadeChange = true;
     }
     break;
@@ -1678,7 +1709,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     
     UndefElts = UndefElts2;
     if (VWidth > InVWidth) {
-      assert(0 && "Unimp");
+      llvm_unreachable("Unimp");
       // If there are more elements in the result than there are in the source,
       // then an output element is undef if the corresponding input element is
       // undef.
@@ -1686,7 +1717,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         if (UndefElts2[OutIdx/Ratio])
           UndefElts.set(OutIdx);
     } else if (VWidth < InVWidth) {
-      assert(0 && "Unimp");
+      llvm_unreachable("Unimp");
       // If there are more elements in the source than there are in the result,
       // then a result element is undef if all of the corresponding input
       // elements are undef.
@@ -1752,11 +1783,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           Value *LHS = II->getOperand(1);
           Value *RHS = II->getOperand(2);
           // Extract the element as scalars.
-          LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II);
-          RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II);
+          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
+            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
+          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
           
           switch (II->getIntrinsicID()) {
-          default: assert(0 && "Case stmts out of sync!");
+          default: llvm_unreachable("Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
           case Intrinsic::x86_sse2_sub_sd:
             TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
@@ -1771,9 +1804,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           
           Instruction *New =
             InsertElementInst::Create(
-              Context->getUndef(II->getType()), TmpV, 0U, II->getName());
+              UndefValue::get(II->getType()), TmpV,
+              ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName());
           InsertNewInstBefore(New, *II);
-          AddSoonDeadInstToWorklist(*II, 0);
           return New;
         }            
       }
@@ -1799,8 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 /// 'shouldApply' and 'apply' methods.
 ///
 template<typename Functor>
-static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,
-                                   LLVMContext* Context) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
   unsigned Opcode = Root.getOpcode();
   Value *LHS = Root.getOperand(0);
 
@@ -1833,7 +1865,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,
       // Make what used to be the LHS of the root be the user of the root...
       Value *ExtraOperand = TmpLHSI->getOperand(1);
       if (&Root == TmpLHSI) {
-        Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType()));
+        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
         return 0;
       }
       Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
@@ -1872,12 +1904,11 @@ namespace {
 // AddRHS - Implements: X + X --> X << 1
 struct AddRHS {
   Value *RHS;
-  LLVMContext* Context;
-  AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {}
+  explicit AddRHS(Value *rhs) : RHS(rhs) {}
   bool shouldApply(Value *LHS) const { return LHS == RHS; }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateShl(Add.getOperand(0),
-                                     Context->getConstantInt(Add.getType(), 1));
+                                     ConstantInt::get(Add.getType(), 1));
   }
 };
 
@@ -1885,12 +1916,11 @@ struct AddRHS {
 //                 iff C1&C2 == 0
 struct AddMaskingAnd {
   Constant *C2;
-  LLVMContext* Context;
-  AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {}
+  explicit AddMaskingAnd(Constant *c) : C2(c) {}
   bool shouldApply(Value *LHS) const {
     ConstantInt *C1;
     return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
-           Context->getConstantExprAnd(C1, C2)->isNullValue();
+           ConstantExpr::getAnd(C1, C2)->isNullValue();
   }
   Instruction *apply(BinaryOperator &Add) const {
     return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
@@ -1901,11 +1931,8 @@ struct AddMaskingAnd {
 
 static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
-  LLVMContext* Context = IC->getContext();
-  
-  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
-    return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I);
-  }
+  if (CastInst *CI = dyn_cast<CastInst>(&I))
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
 
   // Figure out if the constant is the left or the right argument.
   bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -1913,24 +1940,24 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
 
   if (Constant *SOC = dyn_cast<Constant>(SO)) {
     if (ConstIsRHS)
-      return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand);
-    return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC);
+      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
   }
 
   Value *Op0 = SO, *Op1 = ConstOperand;
   if (!ConstIsRHS)
     std::swap(Op0, Op1);
-  Instruction *New;
+  
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
-  else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-    New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
-                          SO->getName()+".cmp");
-  else {
-    assert(0 && "Unknown binary instruction type!");
-    abort();
-  }
-  return IC->InsertNewInstBefore(New, I);
+    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                    SO->getName()+".op");
+  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  llvm_unreachable("Unknown binary instruction type!");
 }
 
 // FoldOpIntoSelect - Given an instruction with a select as one operand and a
@@ -1946,7 +1973,7 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   if (isa<Constant>(TV) || isa<Constant>(FV)) {
     // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType() == Type::Int1Ty) return 0;
+    if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0;
 
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
@@ -1958,20 +1985,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 }
 
 
-/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI
-/// node as operand #0, see if we can fold the instruction into the PHI (which
-/// is only possible if all operands to the PHI are constants).
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+/// that would normally be unprofitable because they strongly encourage jump
+/// threading.
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
+                                         bool AllowAggressive) {
+  AllowAggressive = false;
   PHINode *PN = cast<PHINode>(I.getOperand(0));
   unsigned NumPHIValues = PN->getNumIncomingValues();
-  if (!PN->hasOneUse() || NumPHIValues == 0) return 0;
-
-  // Check to see if all of the operands of the PHI are constants.  If there is
-  // one non-constant value, remember the BB it is.  If there is more than one
-  // or if *it* is a PHI, bail out.
+  if (NumPHIValues == 0 ||
+      // We normally only transform phis with a single use, unless we're trying
+      // hard to make jump threading happen.
+      (!PN->hasOneUse() && !AllowAggressive))
+    return 0;
+  
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
   BasicBlock *NonConstBB = 0;
   for (unsigned i = 0; i != NumPHIValues; ++i)
-    if (!isa<Constant>(PN->getIncomingValue(i))) {
+    if (!isa<Constant>(PN->getIncomingValue(i)) ||
+        isa<ConstantExpr>(PN->getIncomingValue(i))) {
       if (NonConstBB) return 0;  // More than one non-const value.
       if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
       NonConstBB = PN->getIncomingBlock(i);
@@ -1986,7 +2027,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   // operation in that block.  However, if this is a critical edge, we would be
   // inserting the computation one some other paths (e.g. inside a loop).  Only
   // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB) {
+  if (NonConstBB != 0 && !AllowAggressive) {
     BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
     if (!BI || !BI->isUnconditional()) return 0;
   }
@@ -1998,15 +2039,37 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   NewPN->takeName(PN);
 
   // Next, add all of the operands to the PHI.
-  if (I.getNumOperands() == 2) {
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
+                                 FalseVInPred,
+                                 "phitmp", NonConstBB->getTerminator());
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (I.getNumOperands() == 2) {
     Constant *C = cast<Constant>(I.getOperand(1));
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV = 0;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
         if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C);
+          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
         else
-          InV = Context->getConstantExpr(I.getOpcode(), InC, C);
+          InV = ConstantExpr::get(I.getOpcode(), InC, C);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
@@ -2014,14 +2077,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
                                        PN->getIncomingValue(i), C, "phitmp",
                                        NonConstBB->getTerminator());
         else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::Create(CI->getOpcode(), 
+          InV = CmpInst::Create(CI->getOpcode(),
                                 CI->getPredicate(),
                                 PN->getIncomingValue(i), C, "phitmp",
                                 NonConstBB->getTerminator());
         else
-          assert(0 && "Unknown binop!");
+          llvm_unreachable("Unknown binop!");
         
-        AddToWorkList(cast<Instruction>(InV));
+        Worklist.Add(cast<Instruction>(InV));
       }
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
@@ -2031,13 +2094,13 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV;
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy);
+        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
                                I.getType(), "phitmp", 
                                NonConstBB->getTerminator());
-        AddToWorkList(cast<Instruction>(InV));
+        Worklist.Add(cast<Instruction>(InV));
       }
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
@@ -2098,13 +2161,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       if (SimplifyDemandedInstructionBits(I))
         return &I;
 
-      // zext(i1) - 1  ->  select i1, 0, -1
+      // zext(bool) + C -> bool ? C + 1 : C
       if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
-        if (CI->isAllOnesValue() &&
-            ZI->getOperand(0)->getType() == Type::Int1Ty)
-          return SelectInst::Create(ZI->getOperand(0),
-                                    Context->getNullValue(I.getType()),
-                              Context->getConstantIntAllOnesValue(I.getType()));
+        if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
+          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
     }
 
     if (isa<PHINode>(LHS))
@@ -2146,24 +2206,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       const Type *MiddleType = 0;
       switch (Size) {
         default: break;
-        case 32: MiddleType = Type::Int32Ty; break;
-        case 16: MiddleType = Type::Int16Ty; break;
-        case  8: MiddleType = Type::Int8Ty; break;
+        case 32: MiddleType = Type::getInt32Ty(*Context); break;
+        case 16: MiddleType = Type::getInt16Ty(*Context); break;
+        case  8: MiddleType = Type::getInt8Ty(*Context); break;
       }
       if (MiddleType) {
-        Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext");
-        InsertNewInstBefore(NewTrunc, I);
+        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
         return new SExtInst(NewTrunc, I.getType(), I.getName());
       }
     }
   }
 
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return BinaryOperator::CreateXor(LHS, RHS);
 
   // X + X --> X << 1
   if (I.getType()->isInteger()) {
-    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context))
+    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))
       return Result;
 
     if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
@@ -2180,11 +2239,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castNegVal(LHS, Context)) {
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
     if (LHS->getType()->isIntOrIntVector()) {
-      if (Value *RHSV = dyn_castNegVal(RHS, Context)) {
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
-        InsertNewInstBefore(NewAdd, I);
+      if (Value *RHSV = dyn_castNegVal(RHS)) {
+        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
       }
     }
@@ -2194,34 +2252,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castNegVal(RHS, Context))
+    if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
 
   ConstantInt *C2;
-  if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) {
+  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
     if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::CreateMul(RHS, AddOne(C2, Context));
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
 
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
-    if (X == dyn_castFoldableMul(RHS, C1, Context))
-      return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2));
+    if (X == dyn_castFoldableMul(RHS, C1))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
-  if (dyn_castFoldableMul(RHS, C2, Context) == LHS)
-    return BinaryOperator::CreateMul(LHS, AddOne(C2, Context));
+  if (dyn_castFoldableMul(RHS, C2) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
 
   // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS, Context) == RHS ||
-      dyn_castNotVal(RHS, Context) == LHS)
-    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+  if (dyn_castNotVal(LHS) == RHS ||
+      dyn_castNotVal(RHS) == LHS)
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
   
 
   // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
   if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
-    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context))
+    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
       return R;
   
   // A+B --> A|B iff A and B have no bits set in common.
@@ -2258,8 +2316,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       }
 
       if (W == Y) {
-        Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
-                                                            LHS->getName()), I);
+        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
         return BinaryOperator::CreateMul(W, NewAdd);
       }
     }
@@ -2268,11 +2325,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
     Value *X = 0;
     if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::CreateSub(SubOne(CRHS, Context), X);
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
-    if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = Context->getConstantExprAnd(CRHS, C2);
+    if (LHS->hasOneUse() &&
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
+      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
       if (Anded == CRHS) {
         // See if all bits from the first bit set in the Add RHS up are included
         // in the mask.  First, get the rightmost bit.
@@ -2286,8 +2344,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
         if (AddRHSHighBits == AddRHSHighBitsAnd) {
           // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
-                                                            LHS->getName()), I);
+          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
           return BinaryOperator::CreateAnd(NewAdd, C2);
         }
       }
@@ -2299,28 +2356,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         return R;
   }
 
-  // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to i8*) B)  -->  intptrtype
-  {
-    CastInst *CI = dyn_cast<CastInst>(LHS);
-    Value *Other = RHS;
-    if (!CI) {
-      CI = dyn_cast<CastInst>(RHS);
-      Other = LHS;
-    }
-    if (CI && CI->getType()->isSized() && 
-        (CI->getType()->getScalarSizeInBits() ==
-         TD->getIntPtrType()->getPrimitiveSizeInBits()) 
-        && isa<PointerType>(CI->getOperand(0)->getType())) {
-      unsigned AS =
-        cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
-      Value *I2 = InsertBitCastBefore(CI->getOperand(0),
-                                  Context->getPointerType(Type::Int8Ty, AS), I);
-      I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
-      return new PtrToIntInst(I2, CI->getType());
-    }
-  }
-  
   // add (select X 0 (sub n A)) A  -->  select X A n
   {
     SelectInst *SI = dyn_cast<SelectInst>(LHS);
@@ -2336,10 +2371,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
       // Can we fold the add into the argument of the select?
       // We check both true and false select arguments for a matching subtract.
-      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+      if (match(FV, m_Zero()) &&
+          match(TV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the true select value.
         return SelectInst::Create(SI->getCondition(), N, A);
-      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+      if (match(TV, m_Zero()) &&
+          match(FV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the false select value.
         return SelectInst::Create(SI->getCondition(), A, N);
     }
@@ -2351,14 +2388,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
       Constant *CI = 
-        Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType());
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          Context->getConstantExprSExt(CI, I.getType()) == RHSC &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        CI, "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           CI, "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2373,10 +2409,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        RHSConv->getOperand(0),
-                                                        "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
@@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
     // X + 0 --> X
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(Context->getConstantFPNegativeZero
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
                               (I.getType())->getValueAPF()))
         return ReplaceInstUsesWith(I, LHS);
     }
@@ -2404,12 +2438,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castFNegVal(LHS, Context))
+  if (Value *LHSV = dyn_castFNegVal(LHS))
     return BinaryOperator::CreateFSub(RHS, LHSV);
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castFNegVal(RHS, Context))
+    if (Value *V = dyn_castFNegVal(RHS))
       return BinaryOperator::CreateFSub(LHS, V);
 
   // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
@@ -2427,14 +2461,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     // instcombined.
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
       Constant *CI = 
-      Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
-          Context->getConstantExprSIToFP(CI, I.getType()) == CFP &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        CI, "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),
+                                           CI, "addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2449,10 +2482,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
-                                                        RHSConv->getOperand(0),
-                                                        "addconv");
-        InsertNewInstBefore(NewAdd, I);
+        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), 
+                                           RHSConv->getOperand(0), "addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
@@ -2465,10 +2496,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Op0 == Op1)                        // sub X, X  -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castNegVal(Op1, Context))
+  if (Value *V = dyn_castNegVal(Op1))
     return BinaryOperator::CreateAdd(Op0, V);
 
   if (isa<UndefValue>(Op0))
@@ -2484,7 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // C - ~X == X + (1+C)
     Value *X = 0;
     if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::CreateAdd(X, AddOne(C, Context));
+      return BinaryOperator::CreateAdd(X, AddOne(C));
 
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
@@ -2519,22 +2550,29 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
       if (Instruction *R = FoldOpIntoSelect(I, SI, this))
         return R;
+
+    // C - zext(bool) -> bool ? C - 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+      if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
+        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
   }
 
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return BinaryOperator::CreateXor(Op0, Op1);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::Add) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
+                                         I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
+                                         I.getName());
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
           return BinaryOperator::CreateSub(
-            Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0));
+            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
       }
     }
 
@@ -2558,8 +2596,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
         Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
 
-        Value *NewNot =
-          InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
         return BinaryOperator::CreateAnd(Op0, NewNot);
       }
 
@@ -2569,13 +2606,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           if (CSI->isZero())
             if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
               return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
-                                          Context->getConstantExprNeg(DivRHS));
+                                          ConstantExpr::getNeg(DivRHS));
 
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
-      if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) {
+      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
         Constant *CP1 = 
-          Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1),
+          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
                                              C2);
         return BinaryOperator::CreateMul(Op0, CP1);
       }
@@ -2590,18 +2627,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         return ReplaceInstUsesWith(I, Op0I->getOperand(0));
     } else if (Op0I->getOpcode() == Instruction::Sub) {
       if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-        return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
+                                         I.getName());
     }
   }
 
   ConstantInt *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) {
+  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
     if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1, Context));
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2, Context))
-      return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2));
+    if (X == dyn_castFoldableMul(Op1, C2))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
   return 0;
 }
@@ -2610,15 +2648,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castFNegVal(Op1, Context))
+  if (Value *V = dyn_castFNegVal(Op1))
     return BinaryOperator::CreateFAdd(Op0, V);
 
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::FAdd) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
+                                          I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
+                                          I.getName());
     }
   }
 
@@ -2657,26 +2697,24 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
 
 Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // TODO: If Op1 is undef and Op0 is finite, return zero.
-  if (!I.getType()->isFPOrFPVector() &&
-      isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+  if (isa<UndefValue>(Op1))              // undef * X -> 0
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
-  // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+  // Simplify mul instructions with a constant RHS.
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
 
       // ((X << C1)*C2) == (X * (C2 << C1))
       if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
         if (SI->getOpcode() == Instruction::Shl)
           if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
             return BinaryOperator::CreateMul(SI->getOperand(0),
-                                        Context->getConstantExprShl(CI, ShOp));
+                                        ConstantExpr::getShl(CI, ShOp));
 
       if (CI->isZero())
-        return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
+        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
       if (CI->equalsInt(1))                  // X * 1  == X
         return ReplaceInstUsesWith(I, Op0);
       if (CI->isAllOnesValue())              // X * -1 == 0 - X
@@ -2685,12 +2723,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       const APInt& Val = cast<ConstantInt>(CI)->getValue();
       if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
         return BinaryOperator::CreateShl(Op0,
-                 Context->getConstantInt(Op0->getType(), Val.logBase2()));
+                 ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
-    } else if (isa<VectorType>(Op1->getType())) {
-      // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros.
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (Op1C->isNullValue())
+        return ReplaceInstUsesWith(I, Op1C);
 
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
         if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
           return BinaryOperator::CreateNeg(Op0, I.getName());
 
@@ -2705,13 +2744,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
       if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
         // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
-                                                     Op1, "tmp");
-        InsertNewInstBefore(Add, I);
-        Value *C1C2 = Context->getConstantExprMul(Op1, 
-                                           cast<Constant>(Op0I->getOperand(1)));
+        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
+        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
         return BinaryOperator::CreateAdd(Add, C1C2);
         
       }
@@ -2726,93 +2762,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castNegVal(Op0, Context))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context))
+  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(Op1))
       return BinaryOperator::CreateMul(Op0v, Op1v);
 
   // (X / Y) *  Y = X - (X % Y)
   // (X / Y) * -Y = (X % Y) - X
   {
-    Value *Op1 = I.getOperand(1);
+    Value *Op1C = Op1;
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
     if (!BO ||
         (BO->getOpcode() != Instruction::UDiv && 
          BO->getOpcode() != Instruction::SDiv)) {
-      Op1 = Op0;
-      BO = dyn_cast<BinaryOperator>(I.getOperand(1));
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
     }
-    Value *Neg = dyn_castNegVal(Op1, Context);
+    Value *Neg = dyn_castNegVal(Op1C);
     if (BO && BO->hasOneUse() &&
-        (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
         (BO->getOpcode() == Instruction::UDiv ||
          BO->getOpcode() == Instruction::SDiv)) {
       Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
 
-      Instruction *Rem;
+      // If the division is exact, X % Y is zero.
+      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+        if (SDiv->isExact()) {
+          if (Op1BO == Op1C)
+            return ReplaceInstUsesWith(I, Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
+        }
+
+      Value *Rem;
       if (BO->getOpcode() == Instruction::UDiv)
-        Rem = BinaryOperator::CreateURem(Op0BO, Op1BO);
+        Rem = Builder->CreateURem(Op0BO, Op1BO);
       else
-        Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO);
-
-      InsertNewInstBefore(Rem, I);
+        Rem = Builder->CreateSRem(Op0BO, Op1BO);
       Rem->takeName(BO);
 
-      if (Op1BO == Op1)
+      if (Op1BO == Op1C)
         return BinaryOperator::CreateSub(Op0BO, Rem);
-      else
-        return BinaryOperator::CreateSub(Rem, Op0BO);
+      return BinaryOperator::CreateSub(Rem, Op0BO);
     }
   }
 
-  if (I.getType() == Type::Int1Ty)
-    return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
+  /// i1 mul -> i1 and.
+  if (I.getType() == Type::getInt1Ty(*Context))
+    return BinaryOperator::CreateAnd(Op0, Op1);
 
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
-  // See if we can simplify things based on how the boolean was originally
-  // formed.
-  CastInst *BoolCast = 0;
-  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
-    if (CI->getOperand(0)->getType() == Type::Int1Ty)
-      BoolCast = CI;
-  if (!BoolCast)
-    if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1)))
-      if (CI->getOperand(0)->getType() == Type::Int1Ty)
-        BoolCast = CI;
-  if (BoolCast) {
-    if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
-      Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
-      const Type *SCOpTy = SCIOp0->getType();
-      bool TIS = false;
-      
-      // If the icmp is true iff the sign bit of X is set, then convert this
-      // multiply into a shift/and combination.
-      if (isa<ConstantInt>(SCIOp1) &&
-          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
-          TIS) {
-        // Shift the X value right to turn it into "all signbits".
-        Constant *Amt = Context->getConstantInt(SCIOp0->getType(),
-                                          SCOpTy->getPrimitiveSizeInBits()-1);
-        Value *V =
-          InsertNewInstBefore(
-            BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
-                                            BoolCast->getOperand(0)->getName()+
-                                            ".mask"), I);
-
-        // If the multiply type is not the same as the source type, sign extend
-        // or truncate to the multiply type.
-        if (I.getType() != V->getType()) {
-          uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits();
-          uint32_t DstBits = I.getType()->getPrimitiveSizeInBits();
-          Instruction::CastOps opcode = 
-            (SrcBits == DstBits ? Instruction::BitCast : 
-             (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc));
-          V = InsertCastBefore(opcode, V, I.getType(), I);
-        }
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!isa<VectorType>(I.getType())) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
 
-        Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
-        return BinaryOperator::CreateAnd(V, OtherOp);
-      }
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
     }
   }
 
@@ -2821,17 +2844,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
       // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
       // ANSI says we can drop signals, so we can do this anyway." (from GCC)
       if (Op1F->isExactlyValue(1.0))
         return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
-    } else if (isa<VectorType>(Op1->getType())) {
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
         // As above, vector X*splat(1.0) -> X in all defined cases.
         if (Constant *Splat = Op1V->getSplatValue()) {
           if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
@@ -2851,8 +2874,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0, Context))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context))
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(Op1))
       return BinaryOperator::CreateFMul(Op0v, Op1v);
 
   return Changed ? &I : 0;
@@ -2907,11 +2930,11 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
          I != E; ++I) {
       if (*I == SI) {
         *I = SI->getOperand(NonNullOperand);
-        AddToWorkList(BBI);
+        Worklist.Add(BBI);
       } else if (*I == SelectCond) {
-        *I = NonNullOperand == 1 ? Context->getConstantIntTrue() :
-                                   Context->getConstantIntFalse();
-        AddToWorkList(BBI);
+        *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) :
+                                   ConstantInt::getFalse(*Context);
+        Worklist.Add(BBI);
       }
     }
     
@@ -2942,7 +2965,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {
     if (Op0->getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
 
   // X / undef -> undef
@@ -2962,12 +2985,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // (sdiv X, X) --> 1     (udiv X, X) --> 1
   if (Op0 == Op1) {
     if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      Constant *CI = Context->getConstantInt(Ty->getElementType(), 1);
+      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
       std::vector<Constant*> Elts(Ty->getNumElements(), CI);
-      return ReplaceInstUsesWith(I, Context->getConstantVector(Elts));
+      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
     }
 
-    Constant *CI = Context->getConstantInt(I.getType(), 1);
+    Constant *CI = ConstantInt::get(I.getType(), 1);
     return ReplaceInstUsesWith(I, CI);
   }
   
@@ -2989,11 +3012,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
         if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
           if (MultiplyOverflows(RHS, LHSRHS,
-                                I.getOpcode()==Instruction::SDiv, Context))
-            return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+                                I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
           else 
             return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                      Context->getConstantExprMul(RHS, LHSRHS));
+                                      ConstantExpr::getMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -3009,10 +3032,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // 0 / X == 0, we don't need to preserve faults!
   if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
     if (LHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // It can't be division by zero, hence it must be division by one.
-  if (I.getType() == Type::Int1Ty)
+  if (I.getType() == Type::getInt1Ty(*Context))
     return ReplaceInstUsesWith(I, Op0);
 
   if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
@@ -3038,14 +3061,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
     // if so, convert to a right shift.
     if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
       return BinaryOperator::CreateLShr(Op0, 
-            Context->getConstantInt(Op0->getType(), C->getValue().logBase2()));
+            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
 
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
-      Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C),
-                                      I);
-      return SelectInst::Create(IC, Context->getNullValue(I.getType()),
-                                Context->getConstantInt(I.getType(), 1));
+      Value *IC = Builder->CreateICmpULT( Op0, C);
+      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+                                ConstantInt::get(I.getType(), 1));
     }
   }
 
@@ -3057,10 +3079,8 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       if (C1.isPowerOf2()) {
         Value *N = RHSI->getOperand(1);
         const Type *NTy = N->getType();
-        if (uint32_t C2 = C1.logBase2()) {
-          Constant *C2V = Context->getConstantInt(NTy, C2);
-          N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
-        }
+        if (uint32_t C2 = C1.logBase2())
+          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
         return BinaryOperator::CreateLShr(Op0, N);
       }
     }
@@ -3076,16 +3096,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
           // Compute the shift amounts
           uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
           // Construct the "on true" case of the select
-          Constant *TC = Context->getConstantInt(Op0->getType(), TSA);
-          Instruction *TSI = BinaryOperator::CreateLShr(
-                                                 Op0, TC, SI->getName()+".t");
-          TSI = InsertNewInstBefore(TSI, I);
+          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
   
           // Construct the "on false" case of the select
-          Constant *FC = Context->getConstantInt(Op0->getType(), FSA); 
-          Instruction *FSI = BinaryOperator::CreateLShr(
-                                                 Op0, FC, SI->getName()+".f");
-          FSI = InsertNewInstBefore(FSI, I);
+          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
+          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
 
           // construct the select instruction and return it.
           return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
@@ -3105,17 +3121,45 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
     // sdiv X, -1 == -X
     if (RHS->isAllOnesValue())
       return BinaryOperator::CreateNeg(Op0);
+
+    // sdiv X, C  -->  ashr X, log2(C)
+    if (cast<SDivOperator>(&I)->isExact() &&
+        RHS->getValue().isNonNegative() &&
+        RHS->getValue().isPowerOf2()) {
+      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+                                            RHS->getValue().exactLogBase2());
+      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+    }
+
+    // -X/C  -->  X/-C  provided the negation doesn't overflow.
+    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+      if (isa<Constant>(Sub->getOperand(0)) &&
+          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
+          Sub->hasNoSignedWrap())
+        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+                                          ConstantExpr::getNeg(RHS));
   }
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
   if (I.getType()->isInteger()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
-    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
-      // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
-      return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+    if (MaskedValueIsZero(Op0, Mask)) {
+      if (MaskedValueIsZero(Op1, Mask)) {
+        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+      ConstantInt *ShiftedInt;
+      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
+          ShiftedInt->getValue().isPowerOf2()) {
+        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+        // Safe because the only negative value (1 << Y) can take on is
+        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+        // the sign bit set.
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
     }
-  }      
+  }
   
   return 0;
 }
@@ -3134,7 +3178,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   if (isa<UndefValue>(Op0)) {             // undef % X -> 0
     if (I.getType()->isFPOrFPVector())
       return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
@@ -3159,15 +3203,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   // 0 % X == 0 for integer, we don't need to preserve faults!
   if (Constant *LHS = dyn_cast<Constant>(Op0))
     if (LHS->isNullValue())
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     // X % 0 == undef, we don't need to preserve faults!
     if (RHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Context->getUndef(I.getType()));
+      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
     
     if (RHS->equalsInt(1))  // X % 1 == 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
     if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
       if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
@@ -3199,7 +3243,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     // if so, convert to a bitwise and.
     if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
       if (C->getValue().isPowerOf2())
-        return BinaryOperator::CreateAnd(Op0, SubOne(C, Context));
+        return BinaryOperator::CreateAnd(Op0, SubOne(C));
   }
 
   if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -3207,9 +3251,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     if (RHSI->getOpcode() == Instruction::Shl &&
         isa<ConstantInt>(RHSI->getOperand(0))) {
       if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
-        Constant *N1 = Context->getConstantIntAllOnesValue(I.getType());
-        Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
-                                                                   "tmp"), I);
+        Constant *N1 = Constant::getAllOnesValue(I.getType());
+        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
         return BinaryOperator::CreateAnd(Op0, Add);
       }
     }
@@ -3223,12 +3266,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
         // STO == 0 and SFO == 0 handled above.
         if ((STO->getValue().isPowerOf2()) && 
             (SFO->getValue().isPowerOf2())) {
-          Value *TrueAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(STO, Context),
-                                      SI->getName()+".t"), I);
-          Value *FalseAnd = InsertNewInstBefore(
-            BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context),
-                                      SI->getName()+".f"), I);
+          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
+                                              SI->getName()+".t");
+          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
+                                               SI->getName()+".f");
           return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
         }
       }
@@ -3241,15 +3282,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // Handle the integer rem common cases
-  if (Instruction *common = commonIRemTransforms(I))
-    return common;
+  if (Instruction *Common = commonIRemTransforms(I))
+    return Common;
   
-  if (Value *RHSNeg = dyn_castNegVal(Op1, Context))
+  if (Value *RHSNeg = dyn_castNegVal(Op1))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
          cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
       // X % -Y -> X % Y
-      AddUsesToWorkList(I);
+      Worklist.AddValue(I.getOperand(1));
       I.setOperand(1, RHSNeg);
       return &I;
     }
@@ -3279,15 +3320,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
           if (RHS->getValue().isNegative())
-            Elts[i] = cast<ConstantInt>(Context->getConstantExprNeg(RHS));
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
           else
             Elts[i] = RHS;
         }
       }
 
-      Constant *NewRHSV = Context->getConstantVector(Elts);
+      Constant *NewRHSV = ConstantVector::get(Elts);
       if (NewRHSV != RHSV) {
-        AddUsesToWorkList(I);
+        Worklist.AddValue(I.getOperand(1));
         I.setOperand(1, NewRHSV);
         return &I;
       }
@@ -3351,7 +3392,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) {
   case ICmpInst::ICMP_SLE: return 6;  // 110
     // True -> 7
   default:
-    assert(0 && "Invalid ICmp predicate!");
+    llvm_unreachable("Invalid ICmp predicate!");
     return 0;
   }
 }
@@ -3379,7 +3420,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
     // True -> 7
   default:
     // Not expecting FCMP_FALSE and FCMP_TRUE;
-    assert(0 && "Unexpected FCmp predicate!");
+    llvm_unreachable("Unexpected FCmp predicate!");
     return 0;
   }
 }
@@ -3389,10 +3430,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
 static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
-                           LLVMContext* Context) {
+                           LLVMContext *Context) {
   switch (code) {
-  default: assert(0 && "Illegal ICmp code!");
-  case  0: return Context->getConstantIntFalse();
+  default: llvm_unreachable("Illegal ICmp code!");
+  case  0: return ConstantInt::getFalse(*Context);
   case  1: 
     if (sign)
       return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
@@ -3415,7 +3456,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
       return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
     else
       return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case  7: return Context->getConstantIntTrue();
+  case  7: return ConstantInt::getTrue(*Context);
   }
 }
 
@@ -3423,9 +3464,9 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
 /// opcode and two operands into either a FCmp instruction. isordered is passed
 /// in to determine which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS, LLVMContext* Context) {
+                           Value *LHS, Value *RHS, LLVMContext *Context) {
   switch (code) {
-  default: assert(0 && "Illegal FCmp code!");
+  default: llvm_unreachable("Illegal FCmp code!");
   case  0:
     if (isordered)
       return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
@@ -3461,7 +3502,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
       return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
     else
       return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return Context->getConstantIntTrue();
+  case  7: return ConstantInt::getTrue(*Context);
   }
 }
 
@@ -3504,7 +3545,7 @@ struct FoldICmpLogical {
     case Instruction::And: Code = LHSCode & RHSCode; break;
     case Instruction::Or:  Code = LHSCode | RHSCode; break;
     case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
-    default: assert(0 && "Illegal logical opcode!"); return 0;
+    default: llvm_unreachable("Illegal logical opcode!"); return 0;
     }
 
     bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || 
@@ -3529,14 +3570,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   Value *X = Op->getOperand(0);
   Constant *Together = 0;
   if (!Op->isShift())
-    Together = Context->getConstantExprAnd(AndRHS, OpRHS);
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   case Instruction::Xor:
     if (Op->hasOneUse()) {
       // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
-      InsertNewInstBefore(And, TheAnd);
+      Value *And = Builder->CreateAnd(X, AndRHS);
       And->takeName(Op);
       return BinaryOperator::CreateXor(And, Together);
     }
@@ -3547,8 +3587,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
     if (Op->hasOneUse() && Together != OpRHS) {
       // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Instruction *Or = BinaryOperator::CreateOr(X, Together);
-      InsertNewInstBefore(Or, TheAnd);
+      Value *Or = Builder->CreateOr(X, Together);
       Or->takeName(Op);
       return BinaryOperator::CreateAnd(Or, AndRHS);
     }
@@ -3578,8 +3617,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
             return &TheAnd;
           } else {
             // Pull the XOR out of the AND.
-            Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
-            InsertNewInstBefore(NewAnd, TheAnd);
+            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
             NewAnd->takeName(Op);
             return BinaryOperator::CreateXor(NewAnd, AndRHS);
           }
@@ -3595,7 +3633,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask);
+    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask);
 
     if (CI->getValue() == ShlMask) { 
     // Masking out bits that the shift already masks
@@ -3615,7 +3653,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask);
+    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
 
     if (CI->getValue() == ShrMask) {   
     // Masking out bits that the shift already masks.
@@ -3634,14 +3672,12 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
       uint32_t BitWidth = AndRHS->getType()->getBitWidth();
       uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
       APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask);
+      Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
       if (C == AndRHS) {          // Masking out bits shifted in.
         // (Val ashr C1) & C2 -> (Val lshr C1) & C2
         // Make the argument unsigned.
         Value *ShVal = Op->getOperand(0);
-        ShVal = InsertNewInstBefore(
-            BinaryOperator::CreateLShr(ShVal, OpRHS, 
-                                   Op->getName()), TheAnd);
+        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
         return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
       }
     }
@@ -3659,7 +3695,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                            bool isSigned, bool Inside, 
                                            Instruction &IB) {
-  assert(cast<ConstantInt>(Context->getConstantExprICmp((isSigned ? 
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
     
@@ -3675,10 +3711,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     }
 
     // Emit V-Lo <u Hi-Lo
-    Constant *NegLo = Context->getConstantExprNeg(Lo);
-    Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
-    InsertNewInstBefore(Add, IB);
-    Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi);
+    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
     return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
   }
 
@@ -3686,7 +3721,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
     return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
 
   // V < Min || V >= Hi -> V > Hi-1
-  Hi = SubOne(cast<ConstantInt>(Hi), Context);
+  Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
     ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
@@ -3695,10 +3730,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
 
   // Emit V-Lo >u Hi-1-Lo
   // Note that Hi has already had one subtracted from it, above.
-  ConstantInt *NegLo = cast<ConstantInt>(Context->getConstantExprNeg(Lo));
-  Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
-  InsertNewInstBefore(Add, IB);
-  Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi);
+  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
   return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
 }
 
@@ -3740,7 +3774,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   switch (LHSI->getOpcode()) {
   default: return 0;
   case Instruction::And:
-    if (Context->getConstantExprAnd(N, Mask) == Mask) {
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
       if ((Mask->getValue().countLeadingZeros() + 
            Mask->getValue().countPopulation()) == 
@@ -3764,17 +3798,14 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
     if ((Mask->getValue().countLeadingZeros() + 
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && Context->getConstantExprAnd(N, Mask)->isNullValue())
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
   
-  Instruction *New;
   if (isSub)
-    New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
-  else
-    New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
-  return InsertNewInstBefore(New, I);
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
@@ -3785,16 +3816,17 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   ICmpInst::Predicate LHSCC, RHSCC;
   
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
+                         m_ConstantInt(LHSCst))) ||
+      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
+                         m_ConstantInt(RHSCst))))
     return 0;
   
   // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
   // where C is a power of 2
   if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
       LHSCst->getValue().isPowerOf2()) {
-    Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2);
-    InsertNewInstBefore(NewOr, I);
+    Value *NewOr = Builder->CreateOr(Val, Val2);
     return new ICmpInst(LHSCC, NewOr, LHSCst);
   }
   
@@ -3837,14 +3869,14 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
   switch (LHSCC) {
-  default: assert(0 && "Unknown integer condition code!");
+  default: llvm_unreachable("Unknown integer condition code!");
   case ICmpInst::ICMP_EQ:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
     case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
@@ -3852,13 +3884,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     }
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
-      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
-      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
         return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
       break;                        // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
@@ -3866,23 +3898,21 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_NE:
-      if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1
-        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
-        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
-                                                     Val->getName()+".off");
-        InsertNewInstBefore(Add, I);
+      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
         return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            Context->getConstantInt(Add->getType(), 1));
+                            ConstantInt::get(Add->getType(), 1));
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
     break;
   case ICmpInst::ICMP_ULT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
@@ -3894,10 +3924,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SLT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
     case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
@@ -3909,18 +3939,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_UGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14
+      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+      return InsertRangeTest(Val, AddOne(LHSCst),
                              RHSCst, false, true, I);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
@@ -3928,18 +3958,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
       return ReplaceInstUsesWith(I, RHS);
     case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14
+      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
         return new ICmpInst(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst, Context),
+      return InsertRangeTest(Val, AddOne(LHSCst),
                              RHSCst, true, true, I);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
@@ -3950,13 +3980,89 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   return 0;
 }
 
+Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
+                                          FCmpInst *RHS) {
+  
+  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // false.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+        return new FCmpInst(FCmpInst::FCMP_ORD,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp ord x,x" is "fcmp ord x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_ORD,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    
+    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+    if (Op0CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, LHS);
+    
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op1Pred == 0) {
+      std::swap(LHS, RHS);
+      std::swap(Op0Pred, Op1Pred);
+      std::swap(Op0Ordered, Op1Ordered);
+    }
+    if (Op0Pred == 0) {
+      // uno && ueq -> uno && (uno || eq) -> ueq
+      // ord && olt -> ord && (ord && lt) -> olt
+      if (Op0Ordered == Op1Ordered)
+        return ReplaceInstUsesWith(I, RHS);
+      
+      // uno && oeq -> uno && (ord && eq) -> false
+      // uno && ord -> false
+      if (!Op0Ordered)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
+      // ord && ueq -> ord && (uno || eq) -> oeq
+      return cast<Instruction>(getFCmpValue(true, Op1Pred,
+                                            Op0LHS, Op0RHS, Context));
+    }
+  }
+
+  return 0;
+}
+
 
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                         // X & undef -> 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // and X, X = X
   if (Op0 == Op1)
@@ -3976,36 +4082,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   }
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
-    const APInt& AndRHSMask = AndRHS->getValue();
+    const APInt &AndRHSMask = AndRHS->getValue();
     APInt NotAndRHS(~AndRHSMask);
 
     // Optimize a variety of ((val OP C1) & C2) combinations...
-    if (isa<BinaryOperator>(Op0)) {
-      Instruction *Op0I = cast<Instruction>(Op0);
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       Value *Op0LHS = Op0I->getOperand(0);
       Value *Op0RHS = Op0I->getOperand(1);
       switch (Op0I->getOpcode()) {
+      default: break;
       case Instruction::Xor:
       case Instruction::Or:
         // If the mask is only needed on one incoming arm, push it up.
-        if (Op0I->hasOneUse()) {
-          if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
-            // Not masking anything out for the LHS, move to RHS.
-            Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
-                                                   Op0RHS->getName()+".masked");
-            InsertNewInstBefore(NewRHS, I);
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
-          }
-          if (!isa<Constant>(Op0RHS) &&
-              MaskedValueIsZero(Op0RHS, NotAndRHS)) {
-            // Not masking anything out for the RHS, move to LHS.
-            Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
-                                                   Op0LHS->getName()+".masked");
-            InsertNewInstBefore(NewLHS, I);
-            return BinaryOperator::Create(
-                       cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
-          }
+        if (!Op0I->hasOneUse()) break;
+          
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
         }
 
         break;
@@ -4036,8 +4138,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
           if (!(A && A->isZero()) &&               // avoid infinite recursion.
               MaskedValueIsZero(Op0LHS, Mask)) {
-            Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS);
-            InsertNewInstBefore(NewNeg, I);
+            Value *NewNeg = Builder->CreateNeg(Op0RHS);
             return BinaryOperator::CreateAnd(NewNeg, AndRHS);
           }
         }
@@ -4048,9 +4149,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // (1 << x) & 1 --> zext(x == 0)
         // (1 >> x) & 1 --> zext(x == 0)
         if (AndRHSMask == 1 && Op0LHS == AndRHS) {
-          Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS,
-                                           Context->getNullValue(I.getType()));
-          InsertNewInstBefore(NewICmp, I);
+          Value *NewICmp =
+            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
           return new ZExtInst(NewICmp, I.getType());
         }
         break;
@@ -4072,21 +4172,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
               // This will fold the two constants together, which may allow 
               // other simplifications.
-              Instruction *NewCast = CastInst::CreateTruncOrBitCast(
+              Value *NewCast = Builder->CreateTruncOrBitCast(
                 CastOp->getOperand(0), I.getType(), 
                 CastOp->getName()+".shrunk");
-              NewCast = InsertNewInstBefore(NewCast, I);
               // trunc_or_bitcast(C1)&C2
-              Constant *C3 =
-                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
-              C3 = Context->getConstantExprAnd(C3, AndRHS);
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              C3 = ConstantExpr::getAnd(C3, AndRHS);
               return BinaryOperator::CreateAnd(NewCast, C3);
             } else if (CastOp->getOpcode() == Instruction::Or) {
               // Change: and (cast (or X, C1) to T), C2
               // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 =
-                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType());
-              if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS)
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
                 // trunc(C1)&C2
                 return ReplaceInstUsesWith(I, AndRHS);
             }
@@ -4103,17 +4200,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return NV;
   }
 
-  Value *Op0NotVal = dyn_castNotVal(Op0, Context);
-  Value *Op1NotVal = dyn_castNotVal(Op1, Context);
+  Value *Op0NotVal = dyn_castNotVal(Op0);
+  Value *Op1NotVal = dyn_castNotVal(Op1);
 
   if (Op0NotVal == Op1 || Op1NotVal == Op0)  // A & ~A  == ~A & A == 0
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // (~A & ~B) == (~(A | B)) - De Morgan's Law
   if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-    Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
-                                               I.getName()+".demorgan");
-    InsertNewInstBefore(Or, I);
+    Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                  I.getName()+".demorgan");
     return BinaryOperator::CreateNot(Or);
   }
   
@@ -4159,11 +4255,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         cast<BinaryOperator>(Op1)->swapOperands();
         std::swap(A, B);
       }
-      if (A == Op0) {                                // A&(A^B) -> A & ~B
-        Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
-        InsertNewInstBefore(NotB, I);
-        return BinaryOperator::CreateAnd(A, NotB);
-      }
+      if (A == Op0)                                // A&(A^B) -> A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
     }
 
     // (A&((~A)|B)) -> A&B
@@ -4177,7 +4270,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
     // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
@@ -4190,16 +4283,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
         const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+        if (SrcTy == Op1C->getOperand(0)->getType() &&
+            SrcTy->isIntOrIntVector() &&
             // Only do this if the casts both really cause code to be generated.
             ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
-                                                         Op1C->getOperand(0),
-                                                         I.getName());
-          InsertNewInstBefore(NewOp, I);
+          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
@@ -4210,10 +4302,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Instruction *NewOp =
-          InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
-                                                        SI1->getOperand(0),
-                                                        SI0->getName()), I);
+        Value *NewOp =
+          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+                             SI0->getName());
         return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
@@ -4221,66 +4312,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
-      if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
-          RHS->getPredicate() == FCmpInst::FCMP_ORD) {
-        // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
-        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-            // If either of the constants are nans, then the whole thing returns
-            // false.
-            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-            return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0),
-                                RHS->getOperand(0));
-          }
-      } else {
-        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
-        FCmpInst::Predicate Op0CC, Op1CC;
-        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
-            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
-          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-            // Swap RHS operands to match LHS.
-            Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-            std::swap(Op1LHS, Op1RHS);
-          }
-          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-            // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
-            if (Op0CC == Op1CC)
-              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-            else if (Op0CC == FCmpInst::FCMP_FALSE ||
-                     Op1CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-            else if (Op0CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Op1);
-            else if (Op1CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Op0);
-            bool Op0Ordered;
-            bool Op1Ordered;
-            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-            if (Op1Pred == 0) {
-              std::swap(Op0, Op1);
-              std::swap(Op0Pred, Op1Pred);
-              std::swap(Op0Ordered, Op1Ordered);
-            }
-            if (Op0Pred == 0) {
-              // uno && ueq -> uno && (uno || eq) -> ueq
-              // ord && olt -> ord && (ord && lt) -> olt
-              if (Op0Ordered == Op1Ordered)
-                return ReplaceInstUsesWith(I, Op1);
-              // uno && oeq -> uno && (ord && eq) -> false
-              // uno && ord -> false
-              if (!Op0Ordered)
-                return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
-              // ord && ueq -> ord && (uno || eq) -> oeq
-              return cast<Instruction>(getFCmpValue(true, Op1Pred,
-                                                    Op0LHS, Op0RHS, Context));
-            }
-          }
-        }
-      }
-    }
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
+        return Res;
   }
 
   return Changed ? &I : 0;
@@ -4450,7 +4484,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
 /// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
 /// we can simplify this expression to "cond ? C : D or B".
 static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
-                                         Value *C, Value *D) {
+                                         Value *C, Value *D,
+                                         LLVMContext *Context) {
   // If A is not a select of -1/0, this cannot match.
   Value *Cond = 0;
   if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
@@ -4477,8 +4512,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   ICmpInst::Predicate LHSCC, RHSCC;
   
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
+  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
+             m_ConstantInt(LHSCst))) ||
+      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
+             m_ConstantInt(RHSCst))))
     return 0;
   
   // From here on, we only handle:
@@ -4520,18 +4557,16 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
   switch (LHSCC) {
-  default: assert(0 && "Unknown integer condition code!");
+  default: llvm_unreachable("Unknown integer condition code!");
   case ICmpInst::ICMP_EQ:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst, Context)) {
+      if (LHSCst == SubOne(RHSCst)) {
         // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = Context->getConstantExprNeg(LHSCst);
-        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
-                                                     Val->getName()+".off");
-        InsertNewInstBefore(Add, I);
-        AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst);
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
@@ -4546,7 +4581,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
     case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
     case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
@@ -4554,12 +4589,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     }
     break;
   case ICmpInst::ICMP_ULT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
       break;
     case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
@@ -4567,7 +4602,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
                              false, false, I);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
@@ -4580,7 +4615,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_SLT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
       break;
     case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
@@ -4588,7 +4623,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
         return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context),
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
                              true, false, I);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
@@ -4601,7 +4636,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     break;
   case ICmpInst::ICMP_UGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
     case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
       return ReplaceInstUsesWith(I, LHS);
@@ -4609,14 +4644,14 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
       break;
     }
     break;
   case ICmpInst::ICMP_SGT:
     switch (RHSCC) {
-    default: assert(0 && "Unknown integer condition code!");
+    default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
     case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
       return ReplaceInstUsesWith(I, LHS);
@@ -4624,7 +4659,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       break;
     case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
     case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
     case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
       break;
     }
@@ -4633,6 +4668,72 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   return 0;
 }
 
+Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
+                                         FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // true.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+        
+        // Otherwise, no need to compare the two constants, compare the
+        // rest.
+        return new FCmpInst(FCmpInst::FCMP_UNO,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp uno x,x" is "fcmp uno x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_UNO,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC,
+                          Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+    if (Op0CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, LHS);
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op0Ordered == Op1Ordered) {
+      // If both are ordered or unordered, return a new fcmp with
+      // or'ed predicates.
+      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
+                               Op0LHS, Op0RHS, Context);
+      if (Instruction *I = dyn_cast<Instruction>(RV))
+        return I;
+      // Otherwise, it's a constant boolean value...
+      return ReplaceInstUsesWith(I, RV);
+    }
+  }
+  return 0;
+}
+
 /// FoldOrWithConstants - This helper function folds:
 ///
 ///     ((A | B) & C1) | (B & C2)
@@ -4655,8 +4756,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
   if (!Xor.isAllOnesValue()) return 0;
 
   if (V1 == A || V1 == B) {
-    Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I);
+    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
     return BinaryOperator::CreateOr(NewOp, V1);
   }
 
@@ -4668,7 +4768,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
   // or X, X = X
   if (Op0 == Op1)
@@ -4691,21 +4791,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
     // (X & C1) | C2 --> (X | C2) & (C1|C2)
-    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
-      InsertNewInstBefore(Or, I);
+    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        isOnlyUse(Op0)) {
+      Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
       return BinaryOperator::CreateAnd(Or, 
-               Context->getConstantInt(RHS->getValue() | C1->getValue()));
+               ConstantInt::get(*Context, RHS->getValue() | C1->getValue()));
     }
 
     // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
-    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
-      InsertNewInstBefore(Or, I);
+    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+        isOnlyUse(Op0)) {
+      Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
       return BinaryOperator::CreateXor(Or,
-                 Context->getConstantInt(C1->getValue() & ~RHS->getValue()));
+                 ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue()));
     }
 
     // Try to fold constant and into select arguments.
@@ -4738,19 +4838,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   }
   
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
-  if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op1, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
-    InsertNewInstBefore(NOr, I);
+    Value *NOr = Builder->CreateOr(A, Op1);
     NOr->takeName(Op0);
     return BinaryOperator::CreateXor(NOr, C1);
   }
 
   // Y|(X^C) -> (X|Y)^C iff Y&C == 0
-  if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+  if (Op1->hasOneUse() &&
+      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op0, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
-    InsertNewInstBefore(NOr, I);
+    Value *NOr = Builder->CreateOr(A, Op0);
     NOr->takeName(Op0);
     return BinaryOperator::CreateXor(NOr, C1);
   }
@@ -4801,20 +4901,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         V1 = C, V2 = A, V3 = B;
       
       if (V1) {
-        Value *Or =
-          InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+        Value *Or = Builder->CreateOr(V2, V3, "tmp");
         return BinaryOperator::CreateAnd(V1, Or);
       }
     }
 
     // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
-    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context))
       return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context))
       return Match;
 
     // ((A&~B)|(~A&B)) -> A^B
@@ -4841,10 +4940,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
-                                                     SI1->getOperand(0),
-                                                     SI0->getName()), I);
+        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+                                         SI0->getName());
         return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
@@ -4865,26 +4962,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
     if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
   } else {
     A = 0;
   }
   // Note, A is still live here!
   if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
     if (Op0 == B)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
     // (~A | ~B) == (~(A & B)) - De Morgan's Law
     if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-      Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
-                                              I.getName()+".demorgan"), I);
+      Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
       return BinaryOperator::CreateNot(And);
     }
   }
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
@@ -4899,17 +4995,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
             !isa<ICmpInst>(Op1C->getOperand(0))) {
           const Type *SrcTy = Op0C->getOperand(0)->getType();
-          if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+          if (SrcTy == Op1C->getOperand(0)->getType() &&
+              SrcTy->isIntOrIntVector() &&
               // Only do this if the casts both really cause code to be
               // generated.
               ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
                                 I.getType(), TD) &&
               ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                                 I.getType(), TD)) {
-            Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
-                                                          Op1C->getOperand(0),
-                                                          I.getName());
-            InsertNewInstBefore(NewOp, I);
+            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
+                                             Op1C->getOperand(0), I.getName());
             return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
           }
         }
@@ -4919,61 +5014,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     
   // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
-      if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-          RHS->getPredicate() == FCmpInst::FCMP_UNO && 
-          LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
-        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-            // If either of the constants are nans, then the whole thing returns
-            // true.
-            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-            
-            // Otherwise, no need to compare the two constants, compare the
-            // rest.
-            return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0),
-                                RHS->getOperand(0));
-          }
-      } else {
-        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS;
-        FCmpInst::Predicate Op0CC, Op1CC;
-        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) &&
-            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) {
-          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-            // Swap RHS operands to match LHS.
-            Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-            std::swap(Op1LHS, Op1RHS);
-          }
-          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-            // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
-            if (Op0CC == Op1CC)
-              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-            else if (Op0CC == FCmpInst::FCMP_TRUE ||
-                     Op1CC == FCmpInst::FCMP_TRUE)
-              return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-            else if (Op0CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Op1);
-            else if (Op1CC == FCmpInst::FCMP_FALSE)
-              return ReplaceInstUsesWith(I, Op0);
-            bool Op0Ordered;
-            bool Op1Ordered;
-            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-            if (Op0Ordered == Op1Ordered) {
-              // If both are ordered or unordered, return a new fcmp with
-              // or'ed predicates.
-              Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
-                                       Op0LHS, Op0RHS, Context);
-              if (Instruction *I = dyn_cast<Instruction>(RV))
-                return I;
-              // Otherwise, it's a constant boolean value...
-              return ReplaceInstUsesWith(I, RV);
-            }
-          }
-        }
-      }
-    }
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
+        return Res;
   }
 
   return Changed ? &I : 0;
@@ -5001,14 +5044,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (isa<UndefValue>(Op0))
       // Handle undef ^ undef -> 0 special case. This is a common
       // idiom (misuse).
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
     return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
   }
 
   // xor X, X = 0, even if X is nested in a sequence of Xor's.
-  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) {
+  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
     assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
-    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   
   // See if we can simplify any instructions used by the instruction whose sole 
@@ -5020,22 +5063,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
-  if (Value *NotOp = dyn_castNotVal(&I, Context)) {
+  if (Value *NotOp = dyn_castNotVal(&I)) {
     // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
     // ~(~X | Y) === (X & ~Y) - De Morgan's Law
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
       if (Op0I->getOpcode() == Instruction::And || 
           Op0I->getOpcode() == Instruction::Or) {
-        if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) {
-          Instruction *NotY =
-            BinaryOperator::CreateNot(Op0I->getOperand(1),
-                                      Op0I->getOperand(1)->getName()+".not");
-          InsertNewInstBefore(NotY, I);
+        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1),
+                               Op0I->getOperand(1)->getName()+".not");
           if (Op0I->getOpcode() == Instruction::And)
             return BinaryOperator::CreateOr(Op0NotVal, NotY);
-          else
-            return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
       }
     }
@@ -5043,7 +5084,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) {
+    if (RHS->isOne() && Op0->hasOneUse()) {
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
         return new ICmpInst(ICI->getInversePredicate(),
@@ -5059,16 +5100,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
-          if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
-            if (RHS == Context->getConstantExprCast(Opcode, 
-                                             Context->getConstantIntTrue(),
-                                             Op0C->getDestTy())) {
-              Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
-                                     CI->getOpcode(), CI->getInversePredicate(),
-                                     CI->getOperand(0), CI->getOperand(1)), I);
-              NewCI->takeName(CI);
-              return CastInst::Create(Opcode, NewCI, Op0C->getType());
-            }
+          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+              (RHS == ConstantExpr::getCast(Opcode, 
+                                            ConstantInt::getTrue(*Context),
+                                            Op0C->getDestTy()))) {
+            CI->setPredicate(CI->getInversePredicate());
+            return CastInst::Create(Opcode, CI, Op0C->getType());
           }
         }
       }
@@ -5078,9 +5115,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       // ~(c-X) == X-c-1 == X+(-c-1)
       if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
         if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
-          Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C);
-          Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C,
-                                      Context->getConstantInt(I.getType(), 1));
+          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+                                      ConstantInt::get(I.getType(), 1));
           return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
           
@@ -5088,28 +5125,28 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
           if (RHS->isAllOnesValue()) {
-            Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI);
+            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
             return BinaryOperator::CreateSub(
-                           Context->getConstantExprSub(NegOp0CI,
-                                      Context->getConstantInt(I.getType(), 1)),
+                           ConstantExpr::getSub(NegOp0CI,
+                                      ConstantInt::get(I.getType(), 1)),
                                       Op0I->getOperand(0));
           } else if (RHS->getValue().isSignBit()) {
             // (X + C) ^ signbit -> (X + C + signbit)
-            Constant *C =
-                   Context->getConstantInt(RHS->getValue() + Op0CI->getValue());
+            Constant *C = ConstantInt::get(*Context,
+                                           RHS->getValue() + Op0CI->getValue());
             return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
           }
         } else if (Op0I->getOpcode() == Instruction::Or) {
           // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
           if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
-            Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS);
+            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
-            Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS);
-            NewRHS = Context->getConstantExprAnd(NewRHS, 
-                                       Context->getConstantExprNot(CommonBits));
-            AddToWorkList(Op0I);
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            NewRHS = ConstantExpr::getAnd(NewRHS, 
+                                       ConstantExpr::getNot(CommonBits));
+            Worklist.Add(Op0I);
             I.setOperand(0, Op0I->getOperand(0));
             I.setOperand(1, NewRHS);
             return &I;
@@ -5127,13 +5164,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *X = dyn_castNotVal(Op0, Context))   // ~A ^ A == -1
+  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
     if (X == Op1)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
-  if (Value *X = dyn_castNotVal(Op1, Context))   // A ^ ~A == -1
+  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
     if (X == Op0)
-      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
 
   
   BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
@@ -5152,7 +5189,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
     } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
       return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
-    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+               Op1I->hasOneUse()){
       if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
         Op1I->swapOperands();
         std::swap(A, B);
@@ -5167,26 +5205,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
   if (Op0I) {
     Value *A, *B;
-    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) {
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        Op0I->hasOneUse()) {
       if (A == Op1)                                  // (B|A)^B == (A|B)^B
         std::swap(A, B);
-      if (B == Op1) {                                // (A|B)^B == A & ~B
-        Instruction *NotB =
-          InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
-        return BinaryOperator::CreateAnd(A, NotB);
-      }
+      if (B == Op1)                                  // (A|B)^B == A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
     } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
       return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
     } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
       return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
-    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+               Op0I->hasOneUse()){
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
         std::swap(A, B);
       if (B == Op1 &&                                      // (B&A)^A == ~B & A
           !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
-        Instruction *N =
-          InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
-        return BinaryOperator::CreateAnd(N, Op1);
+        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
       }
     }
   }
@@ -5196,10 +5231,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       Op0I->getOpcode() == Op1I->getOpcode() && 
       Op0I->getOperand(1) == Op1I->getOperand(1) &&
       (Op1I->hasOneUse() || Op1I->hasOneUse())) {
-    Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
-                                                    Op1I->getOperand(0),
-                                                    Op0I->getName()), I);
+    Value *NewOp =
+      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+                         Op0I->getName());
     return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
                                   Op1I->getOperand(1));
   }
@@ -5235,8 +5269,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         X = B, Y = A, Z = C;
       
       if (X) {
-        Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
         return BinaryOperator::CreateAnd(NewOp, X);
       }
     }
@@ -5244,7 +5277,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     
   // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context))
+    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
       return R;
 
   // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
@@ -5258,10 +5291,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
-                                                         Op1C->getOperand(0),
-                                                         I.getName());
-          InsertNewInstBefore(NewOp, I);
+          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
@@ -5271,8 +5302,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
 }
 
 static ConstantInt *ExtractElement(Constant *V, Constant *Idx,
-                                   LLVMContext* Context) {
-  return cast<ConstantInt>(Context->getConstantExprExtractElement(V, Idx));
+                                   LLVMContext *Context) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
 }
 
 static bool HasAddOverflow(ConstantInt *Result,
@@ -5290,13 +5321,13 @@ static bool HasAddOverflow(ConstantInt *Result,
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
 /// overflowed for this type.
 static bool AddWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext* Context,
+                            Constant *In2, LLVMContext *Context,
                             bool IsSigned = false) {
-  Result = Context->getConstantExprAdd(In1, In2);
+  Result = ConstantExpr::getAdd(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
       if (HasAddOverflow(ExtractElement(Result, Idx, Context),
                          ExtractElement(In1, Idx, Context),
                          ExtractElement(In2, Idx, Context),
@@ -5326,13 +5357,13 @@ static bool HasSubOverflow(ConstantInt *Result,
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
 /// overflowed for this type.
 static bool SubWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext* Context,
+                            Constant *In2, LLVMContext *Context,
                             bool IsSigned = false) {
-  Result = Context->getConstantExprSub(In1, In2);
+  Result = ConstantExpr::getSub(In1, In2);
 
   if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i);
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
       if (HasSubOverflow(ExtractElement(Result, Idx, Context),
                          ExtractElement(In1, Idx, Context),
                          ExtractElement(In2, Idx, Context),
@@ -5351,11 +5382,10 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1,
 /// code necessary to compute the offset from the base pointer (without adding
 /// in the base pointer).  Return the result as a signed integer of intptr size.
 static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
-  TargetData &TD = IC.getTargetData();
+  TargetData &TD = *IC.getTargetData();
   gep_type_iterator GTI = gep_type_begin(GEP);
-  const Type *IntPtrTy = TD.getIntPtrType();
-  LLVMContext* Context = IC.getContext();
-  Value *Result = Context->getNullValue(IntPtrTy);
+  const Type *IntPtrTy = TD.getIntPtrType(I.getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
   unsigned IntPtrWidth = TD.getPointerSizeInBits();
@@ -5372,74 +5402,49 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
         
-        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
-          Result = 
-             Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size));
-        else
-          Result = IC.InsertNewInstBefore(
-                   BinaryOperator::CreateAdd(Result,
-                                        Context->getConstantInt(IntPtrTy, Size),
-                                             GEP->getName()+".offs"), I);
+        Result = IC.Builder->CreateAdd(Result,
+                                       ConstantInt::get(IntPtrTy, Size),
+                                       GEP->getName()+".offs");
         continue;
       }
       
-      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
       Constant *OC =
-              Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = Context->getConstantExprMul(OC, Scale);
-      if (Constant *RC = dyn_cast<Constant>(Result))
-        Result = Context->getConstantExprAdd(RC, Scale);
-      else {
-        // Emit an add instruction.
-        Result = IC.InsertNewInstBefore(
-           BinaryOperator::CreateAdd(Result, Scale,
-                                     GEP->getName()+".offs"), I);
-      }
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      // Emit an add instruction.
+      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
       continue;
     }
     // Convert to correct type.
-    if (Op->getType() != IntPtrTy) {
-      if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true);
-      else
-        Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy,
-                                                                true,
-                                                      Op->getName()+".c"), I);
-    }
+    if (Op->getType() != IntPtrTy)
+      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
     if (Size != 1) {
-      Constant *Scale = Context->getConstantInt(IntPtrTy, Size);
-      if (Constant *OpC = dyn_cast<Constant>(Op))
-        Op = Context->getConstantExprMul(OpC, Scale);
-      else    // We'll let instcombine(mul) convert this to a shl if possible.
-        Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
-                                                  GEP->getName()+".idx"), I);
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
     }
 
     // Emit an add instruction.
-    if (isa<Constant>(Op) && isa<Constant>(Result))
-      Result = Context->getConstantExprAdd(cast<Constant>(Op),
-                                    cast<Constant>(Result));
-    else
-      Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
-                                                  GEP->getName()+".offs"), I);
+    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
   }
   return Result;
 }
 
 
-/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
-/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want
-/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be
-/// complex, and scales are involved.  The above expression would also be legal
-/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This
-/// later form is less amenable to optimization though, and we are allowed to
-/// generate the first by knowing that pointer arithmetic doesn't overflow.
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
 ///
 /// If we can't emit an optimized form for this expression, this returns null.
 /// 
 static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
                                           InstCombiner &IC) {
-  TargetData &TD = IC.getTargetData();
+  TargetData &TD = *IC.getTargetData();
   gep_type_iterator GTI = gep_type_begin(GEP);
 
   // Check to see if this gep only has a single variable index.  If so, and if
@@ -5502,8 +5507,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
     // we don't need to bother extending: the extension won't affect where the
     // computation crosses zero.
     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
-      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
-                                  VariableIdx->getNameStart(), &I);
+      VariableIdx = new TruncInst(VariableIdx, 
+                                  TD.getIntPtrType(VariableIdx->getContext()),
+                                  VariableIdx->getName(), &I);
     return VariableIdx;
   }
   
@@ -5523,40 +5529,39 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
     return 0;
 
   // Okay, we can do this evaluation.  Start by converting the index to intptr.
-  const Type *IntPtrTy = TD.getIntPtrType();
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
   if (VariableIdx->getType() != IntPtrTy)
     VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
                                               true /*SExt*/, 
-                                              VariableIdx->getNameStart(), &I);
-  Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs);
+                                              VariableIdx->getName(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
   return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
 }
 
 
 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something
 /// else.  At this point we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                                        ICmpInst::Predicate Cond,
                                        Instruction &I) {
-  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
-
   // Look through bitcasts.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
     RHS = BCI->getOperand(0);
 
   Value *PtrBase = GEPLHS->getOperand(0);
-  if (PtrBase == RHS) {
+  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
     // This transformation (ignoring the base and scales) is valid because we
-    // know pointers can't overflow.  See if we can output an optimized form.
+    // know pointers can't overflow since the gep is inbounds.  See if we can
+    // output an optimized form.
     Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
     
     // If not, synthesize the offset the hard way.
     if (Offset == 0)
       Offset = EmitGEPOffset(GEPLHS, I, *this);
     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                        Context->getNullValue(Offset->getType()));
-  } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
+                        Constant::getNullValue(Offset->getType()));
+  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
     if (PtrBase != GEPRHS->getOperand(0)) {
@@ -5572,7 +5577,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
       // If all indices are the same, just compare the base pointers.
       if (IndicesTheSame)
-        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), 
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
                             GEPLHS->getOperand(0), GEPRHS->getOperand(0));
 
       // Otherwise, the base pointers are different and the indices are
@@ -5622,7 +5627,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
       if (NumDifferences == 0)   // SAME GEP?
         return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   Context->getConstantInt(Type::Int1Ty,
+                                   ConstantInt::get(Type::getInt1Ty(*Context),
                                              ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1) {
@@ -5635,7 +5640,8 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
 
     // Only lower this if the icmp is the only user of the GEP or if we expect
     // the result to fold to a constant!
-    if ((isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+    if (TD &&
+        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
       Value *L = EmitGEPOffset(GEPLHS, I, *this);
@@ -5680,7 +5686,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   
   ICmpInst::Predicate Pred;
   switch (I.getPredicate()) {
-  default: assert(0 && "Unexpected predicate!");
+  default: llvm_unreachable("Unexpected predicate!");
   case FCmpInst::FCMP_UEQ:
   case FCmpInst::FCMP_OEQ:
     Pred = ICmpInst::ICMP_EQ;
@@ -5706,9 +5712,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     Pred = ICmpInst::ICMP_NE;
     break;
   case FCmpInst::FCMP_ORD:
-    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
   case FCmpInst::FCMP_UNO:
-    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
   }
   
   const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
@@ -5728,8 +5734,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
           Pred == ICmpInst::ICMP_SLE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   } else {
     // If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -5740,8 +5746,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
           Pred == ICmpInst::ICMP_ULE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   }
   
@@ -5753,8 +5759,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
           Pred == ICmpInst::ICMP_SGE)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
     }
   }
 
@@ -5763,27 +5769,27 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // casting the FP value to the integer value and back, checking for equality.
   // Don't do this for zero, because -0.0 is not fractional.
   Constant *RHSInt = LHSUnsigned
-    ? Context->getConstantExprFPToUI(RHSC, IntTy)
-    : Context->getConstantExprFPToSI(RHSC, IntTy);
+    ? ConstantExpr::getFPToUI(RHSC, IntTy)
+    : ConstantExpr::getFPToSI(RHSC, IntTy);
   if (!RHS.isZero()) {
     bool Equal = LHSUnsigned
-      ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC
-      : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC;
+      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
     if (!Equal) {
       // If we had a comparison against a fractional value, we have to adjust
       // the compare predicate and sometimes the value.  RHSC is rounded towards
       // zero at this point.
       switch (Pred) {
-      default: assert(0 && "Unexpected integer comparison!");
+      default: llvm_unreachable("Unexpected integer comparison!");
       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       case ICmpInst::ICMP_ULE:
         // (float)int <= 4.4   --> int <= 4
         // (float)int <= -4.4  --> false
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         break;
       case ICmpInst::ICMP_SLE:
         // (float)int <= 4.4   --> int <= 4
@@ -5795,7 +5801,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int < -4.4   --> false
         // (float)int < 4.4    --> int <= 4
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         Pred = ICmpInst::ICMP_ULE;
         break;
       case ICmpInst::ICMP_SLT:
@@ -5808,7 +5814,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int > 4.4    --> int > 4
         // (float)int > -4.4   --> true
         if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
         break;
       case ICmpInst::ICMP_SGT:
         // (float)int > 4.4    --> int > 4
@@ -5820,7 +5826,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         // (float)int >= -4.4   --> true
         // (float)int >= 4.4    --> int > 4
         if (!RHS.isNegative())
-          return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
         Pred = ICmpInst::ICMP_UGT;
         break;
       case ICmpInst::ICMP_SGE:
@@ -5844,22 +5850,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   // Fold trivial predicates.
   if (I.getPredicate() == FCmpInst::FCMP_FALSE)
-    return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
   if (I.getPredicate() == FCmpInst::FCMP_TRUE)
-    return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
   
   // Simplify 'fcmp pred X, X'
   if (Op0 == Op1) {
     switch (I.getPredicate()) {
-    default: assert(0 && "Unknown predicate!");
+    default: llvm_unreachable("Unknown predicate!");
     case FCmpInst::FCMP_UEQ:    // True if unordered or equal
     case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal
     case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal
-      return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
     case FCmpInst::FCMP_OGT:    // True if ordered and greater than
     case FCmpInst::FCMP_OLT:    // True if ordered and less than
     case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal
-      return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
       
     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
     case FCmpInst::FCMP_ULT:    // True if unordered or less than
@@ -5867,7 +5873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
       // Canonicalize these to be 'fcmp uno %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_UNO);
-      I.setOperand(1, Context->getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
       return &I;
       
     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
@@ -5876,13 +5882,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
       // Canonicalize these to be 'fcmp ord %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_ORD);
-      I.setOperand(1, Context->getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
       return &I;
     }
   }
     
   if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef
-    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
 
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
@@ -5890,11 +5896,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
       if (CFP->getValueAPF().isNaN()) {
         if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
-          return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
         assert(FCmpInst::isUnordered(I.getPredicate()) &&
                "Comparison must be either ordered or unordered!");
         // True if unordered.
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       }
     }
     
@@ -5905,7 +5911,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
             return NV;
         break;
       case Instruction::SIToFP:
@@ -5921,18 +5927,16 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
+            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
-            Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
-                                                      LHSI->getOperand(2), RHSC,
-                                                      I.getName()), I);
+            Op2 = Builder->CreateFCmp(I.getPredicate(),
+                                      LHSI->getOperand(2), RHSC, I.getName());
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC);
+            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
             // Insert a new FCmp of the other select operand.
-            Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(),
-                                                      LHSI->getOperand(1), RHSC,
-                                                      I.getName()), I);
+            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
           }
         }
 
@@ -5952,28 +5956,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
   // icmp X, X
   if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
                                                    I.isTrueWhenEqual()));
 
   if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
-    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty));
+    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
   
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
-  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
+  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || 
        isa<ConstantPointerNull>(Op0)) &&
-      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
+      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || 
        isa<ConstantPointerNull>(Op1)))
-    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, 
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), 
                                                    !I.isTrueWhenEqual()));
 
   // icmp's with boolean values can always be turned into bitwise operations
-  if (Ty == Type::Int1Ty) {
+  if (Ty == Type::getInt1Ty(*Context)) {
     switch (I.getPredicate()) {
-    default: assert(0 && "Invalid icmp instruction!");
+    default: llvm_unreachable("Invalid icmp instruction!");
     case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
-      Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Xor, I);
+      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
       return BinaryOperator::CreateNot(Xor);
     }
     case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
@@ -5983,32 +5986,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
       // FALL THROUGH
     case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
-      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
       return BinaryOperator::CreateAnd(Not, Op1);
     }
     case ICmpInst::ICMP_SGT:
       std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
       // FALL THROUGH
     case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
-      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
       return BinaryOperator::CreateAnd(Not, Op0);
     }
     case ICmpInst::ICMP_UGE:
       std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
       // FALL THROUGH
     case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
-      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
       return BinaryOperator::CreateOr(Not, Op1);
     }
     case ICmpInst::ICMP_SGE:
       std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
       // FALL THROUGH
     case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
-      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
-      InsertNewInstBefore(Not, I);
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
       return BinaryOperator::CreateOr(Not, Op0);
     }
     }
@@ -6040,20 +6039,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     default: break;
     case ICmpInst::ICMP_ULE:
       if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+                          AddOne(CI));
     case ICmpInst::ICMP_SLE:
       if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                          AddOne(CI));
     case ICmpInst::ICMP_UGE:
       if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+                          SubOne(CI));
     case ICmpInst::ICMP_SGE:
       if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
-      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                          SubOne(CI));
     }
     
     // If this comparison is a normal comparison, it demands all
@@ -6100,110 +6103,114 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // that code below can assume that Min != Max.
     if (!isa<Constant>(Op0) && Op0Min == Op0Max)
       return new ICmpInst(I.getPredicate(),
-                          Context->getConstantInt(Op0Min), Op1);
+                          ConstantInt::get(*Context, Op0Min), Op1);
     if (!isa<Constant>(Op1) && Op1Min == Op1Max)
-      return new ICmpInst(I.getPredicate(), Op0, 
-                          Context->getConstantInt(Op1Min));
+      return new ICmpInst(I.getPredicate(), Op0,
+                          ConstantInt::get(*Context, Op1Min));
 
     // Based on the range information we know about the LHS, see if we can
     // simplify this comparison.  For example, (x&4) < 8  is always true.
     switch (I.getPredicate()) {
-    default: assert(0 && "Unknown icmp opcode!");
+    default: llvm_unreachable("Unknown icmp opcode!");
     case ICmpInst::ICMP_EQ:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_NE:
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       break;
     case ICmpInst::ICMP_ULT:
       if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              SubOne(CI));
 
         // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
         if (CI->isMinValue(true))
           return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                           Context->getConstantIntAllOnesValue(Op0->getType()));
+                           Constant::getAllOnesValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_UGT:
       if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
 
       if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              AddOne(CI));
 
         // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
         if (CI->isMaxValue(true))
           return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                              Context->getNullValue(Op0->getType()));
+                              Constant::getNullValue(Op0->getType()));
       }
       break;
     case ICmpInst::ICMP_SLT:
       if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              SubOne(CI));
       }
       break;
     case ICmpInst::ICMP_SGT:
       if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
 
       if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
         if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context));
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                              AddOne(CI));
       }
       break;
     case ICmpInst::ICMP_SGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
       if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_SLE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
       if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_UGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
       if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     case ICmpInst::ICMP_ULE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
       if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntTrue());
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
       if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, Context->getConstantIntFalse());
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
       break;
     }
 
@@ -6255,16 +6262,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             }
           if (isAllZeros)
             return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                    Context->getNullValue(LHSI->getOperand(0)->getType()));
+                    Constant::getNullValue(LHSI->getOperand(0)->getType()));
         }
         break;
 
       case Instruction::PHI:
-        // Only fold icmp into the PHI if the phi and fcmp are in the same
+        // Only fold icmp into the PHI if the phi and icmp are in the same
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
             return NV;
         break;
       case Instruction::Select: {
@@ -6275,18 +6282,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (LHSI->hasOneUse()) {
           if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
             // Fold the known value into the constant operand.
-            Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
+            Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
-            Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
-                                                   LHSI->getOperand(2), RHSC,
-                                                   I.getName()), I);
+            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+                                      RHSC, I.getName());
           } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
             // Fold the known value into the constant operand.
-            Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC);
+            Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
             // Insert a new ICmp of the other select operand.
-            Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(),
-                                                   LHSI->getOperand(1), RHSC,
-                                                   I.getName()), I);
+            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
           }
         }
 
@@ -6298,19 +6303,31 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         // If we have (malloc != null), and if the malloc has a single use, we
         // can assume it is successful and remove the malloc.
         if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
-          AddToWorkList(LHSI);
-          return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty,
-                                                         !I.isTrueWhenEqual()));
+          Worklist.Add(LHSI);
+          return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
+        }
+        break;
+      case Instruction::Call:
+        // If we have (malloc != null), and if the malloc has a single use, we
+        // can assume it is successful and remove the malloc.
+        if (isMalloc(LHSI) && LHSI->hasOneUse() &&
+            isa<ConstantPointerNull>(RHSC)) {
+          Worklist.Add(LHSI);
+          return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
         }
         break;
       }
   }
 
   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
-  if (User *GEP = dyn_castGetElementPtr(Op0))
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
     if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
       return NI;
-  if (User *GEP = dyn_castGetElementPtr(Op1))
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
     if (Instruction *NI = FoldGEPICmp(GEP, Op0,
                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
       return NI;
@@ -6333,10 +6350,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // If Op1 is a constant, we can fold the cast into the constant.
       if (Op0->getType() != Op1->getType()) {
         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-          Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType());
+          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
         } else {
           // Otherwise, cast the RHS right before the icmp
-          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
         }
       }
       return new ICmpInst(I.getPredicate(), Op0, Op1);
@@ -6397,16 +6414,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
             // Mask = -1 >> count-trailing-zeros(Cst).
             if (!CI->isZero() && !CI->isOne()) {
               const APInt &AP = CI->getValue();
-              ConstantInt *Mask = Context->getConstantInt(
+              ConstantInt *Mask = ConstantInt::get(*Context, 
                                       APInt::getLowBitsSet(AP.getBitWidth(),
                                                            AP.getBitWidth() -
                                                       AP.countTrailingZeros()));
-              Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0),
-                                                            Mask);
-              Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0),
-                                                            Mask);
-              InsertNewInstBefore(And1, I);
-              InsertNewInstBefore(And2, I);
+              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
+              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
               return new ICmpInst(I.getPredicate(), And1, And2);
             }
           }
@@ -6435,7 +6448,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
         Value *OtherVal = A == Op1 ? B : A;
         return new ICmpInst(I.getPredicate(), OtherVal,
-                            Context->getNullValue(A->getType()));
+                            Constant::getNullValue(A->getType()));
       }
 
       if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
@@ -6444,10 +6457,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (match(B, m_ConstantInt(C1)) &&
             match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
           Constant *NC = 
-                       Context->getConstantInt(C1->getValue() ^ C2->getValue());
-          Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
-          return new ICmpInst(I.getPredicate(), A,
-                              InsertNewInstBefore(Xor, I));
+                   ConstantInt::get(*Context, C1->getValue() ^ C2->getValue());
+          Value *Xor = Builder->CreateXor(C, NC, "tmp");
+          return new ICmpInst(I.getPredicate(), A, Xor);
         }
         
         // A^B == A^D -> B == D
@@ -6463,18 +6475,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // A == (A^B)  ->  B == 0
       Value *OtherVal = A == Op0 ? B : A;
       return new ICmpInst(I.getPredicate(), OtherVal,
-                          Context->getNullValue(A->getType()));
+                          Constant::getNullValue(A->getType()));
     }
 
     // (A-B) == A  ->  B == 0
     if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B, 
-                          Context->getNullValue(B->getType()));
+                          Constant::getNullValue(B->getType()));
 
     // A == (A-B)  ->  B == 0
     if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
       return new ICmpInst(I.getPredicate(), B,
-                          Context->getNullValue(B->getType()));
+                          Constant::getNullValue(B->getType()));
     
     // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
     if (Op0->hasOneUse() && Op1->hasOneUse() &&
@@ -6493,10 +6505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
       
       if (X) {   // Build (X^Y) & Z
-        Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
-        Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
+        Op1 = Builder->CreateXor(X, Y, "tmp");
+        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
         I.setOperand(0, Op1);
-        I.setOperand(1, Context->getNullValue(Op1->getType()));
+        I.setOperand(1, Constant::getNullValue(Op1->getType()));
         return &I;
       }
     }
@@ -6535,13 +6547,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
   // C2 (CI). By solving for X we can turn this into a range check 
   // instead of computing a divide. 
-  Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS);
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
 
   // Determine if the product overflows by seeing if the product is
   // not equal to the divide. Make sure we do the same kind of divide
   // as in the LHS instruction that we're folding. 
-  bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) :
-                 Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS;
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
 
   // Get the ICmp opcode
   ICmpInst::Predicate Pred = ICI.getPredicate();
@@ -6565,8 +6577,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
-      LoBound = cast<ConstantInt>(Context->getConstantExprNeg(SubOne(DivRHS, 
-                                                                    Context)));
+      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
       HiBound = DivRHS;
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
@@ -6575,11 +6586,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true);
     } else {                       // (X / pos) op neg
       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
-      HiBound = AddOne(Prod, Context);
+      HiBound = AddOne(Prod);
       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow) {
         ConstantInt* DivNeg =
-                         cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
+                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context,
                                      true) ? -1 : 0;
        }
@@ -6587,15 +6598,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
     if (CmpRHSV == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
-      LoBound = AddOne(DivRHS, Context);
-      HiBound = cast<ConstantInt>(Context->getConstantExprNeg(DivRHS));
+      LoBound = AddOne(DivRHS);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
       if (HiBound == DivRHS) {     // -INTMIN = INTMIN
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
       }
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
       // e.g. X/-5 op 3  --> [-19, -14)
-      HiBound = AddOne(Prod, Context);
+      HiBound = AddOne(Prod);
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
         LoOverflow = AddWithOverflow(LoBound, HiBound,
@@ -6613,42 +6624,42 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
 
   Value *X = DivI->getOperand(0);
   switch (Pred) {
-  default: assert(0 && "Unhandled icmp opcode!");
+  default: llvm_unreachable("Unhandled icmp opcode!");
   case ICmpInst::ICMP_EQ:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, LoBound);
     else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, HiBound);
     else
       return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, LoBound);
     else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, HiBound);
     else
       return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_SLT:
     if (LoOverflow == +1)   // Low bound is greater than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     if (LoOverflow == -1)   // Low bound is less than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     return new ICmpInst(Pred, X, LoBound);
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_SGT:
     if (HiOverflow == +1)       // High bound greater than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
     else if (HiOverflow == -1)  // High bound less than input range.
-      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
     if (Pred == ICmpInst::ICMP_UGT)
       return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
     else
@@ -6682,7 +6693,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         NewRHS.zext(SrcBits);
         NewRHS |= KnownOne;
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            Context->getConstantInt(NewRHS));
+                            ConstantInt::get(*Context, NewRHS));
       }
     }
     break;
@@ -6699,7 +6710,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // the operation, just stop using the Xor.
         if (!XorCST->getValue().isNegative()) {
           ICI.setOperand(0, CompareVal);
-          AddToWorkList(LHSI);
+          Worklist.Add(LHSI);
           return &ICI;
         }
         
@@ -6711,10 +6722,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         
         if (isTrueIfPositive)
           return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
-                              SubOne(RHS, Context));
+                              SubOne(RHS));
         else
           return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
-                              AddOne(RHS, Context));
+                              AddOne(RHS));
       }
 
       if (LHSI->hasOneUse()) {
@@ -6725,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          ? ICI.getUnsignedPredicate()
                                          : ICI.getSignedPredicate();
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Context->getConstantInt(RHSV ^ SignBit));
+                              ConstantInt::get(*Context, RHSV ^ SignBit));
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
@@ -6736,7 +6747,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                          : ICI.getSignedPredicate();
           Pred = ICI.getSwappedPredicate(Pred);
           return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Context->getConstantInt(RHSV ^ NotSignBit));
+                              ConstantInt::get(*Context, RHSV ^ NotSignBit));
         }
       }
     }
@@ -6763,12 +6774,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           NewCST.zext(BitWidth);
           APInt NewCI = RHSV;
           NewCI.zext(BitWidth);
-          Instruction *NewAnd = 
-            BinaryOperator::CreateAnd(Cast->getOperand(0),
-                               Context->getConstantInt(NewCST),LHSI->getName());
-          InsertNewInstBefore(NewAnd, ICI);
+          Value *NewAnd = 
+            Builder->CreateAnd(Cast->getOperand(0),
+                           ConstantInt::get(*Context, NewCST), LHSI->getName());
           return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              Context->getConstantInt(NewCI));
+                              ConstantInt::get(*Context, NewCI));
         }
       }
       
@@ -6805,32 +6815,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (CanFold) {
           Constant *NewCst;
           if (Shift->getOpcode() == Instruction::Shl)
-            NewCst = Context->getConstantExprLShr(RHS, ShAmt);
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
           else
-            NewCst = Context->getConstantExprShl(RHS, ShAmt);
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
           
           // Check to see if we are shifting out any of the bits being
           // compared.
-          if (Context->getConstantExpr(Shift->getOpcode(),
+          if (ConstantExpr::get(Shift->getOpcode(),
                                        NewCst, ShAmt) != RHS) {
             // If we shifted bits out, the fold is not going to work out.
             // As a special case, check to see if this means that the
             // result is always true or false now.
             if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-              return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
             if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-              return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
           } else {
             ICI.setOperand(1, NewCst);
             Constant *NewAndCST;
             if (Shift->getOpcode() == Instruction::Shl)
-              NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt);
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
             else
-              NewAndCST = Context->getConstantExprShl(AndCST, ShAmt);
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
             LHSI->setOperand(1, NewAndCST);
             LHSI->setOperand(0, Shift->getOperand(0));
-            AddToWorkList(Shift); // Shift is dead.
-            AddUsesToWorkList(ICI);
+            Worklist.Add(Shift); // Shift is dead.
             return &ICI;
           }
         }
@@ -6845,19 +6854,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Compute C << Y.
         Value *NS;
         if (Shift->getOpcode() == Instruction::LShr) {
-          NS = BinaryOperator::CreateShl(AndCST, 
-                                         Shift->getOperand(1), "tmp");
+          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
         } else {
           // Insert a logical shift.
-          NS = BinaryOperator::CreateLShr(AndCST,
-                                          Shift->getOperand(1), "tmp");
+          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
         }
-        InsertNewInstBefore(cast<Instruction>(NS), ICI);
         
         // Compute X & (C << Y).
-        Instruction *NewAnd = 
-          BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
-        InsertNewInstBefore(NewAnd, ICI);
+        Value *NewAnd = 
+          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
         
         ICI.setOperand(0, NewAnd);
         return &ICI;
@@ -6881,11 +6886,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       // If we are comparing against bits always shifted out, the
       // comparison cannot succeed.
       Constant *Comp =
-        Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt),
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
                                                                  ShAmt);
       if (Comp != RHS) {// Comparing against a bit that we know is zero.
         bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
+        Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
         return ReplaceInstUsesWith(ICI, Cst);
       }
       
@@ -6893,15 +6898,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
         Constant *Mask =
-          Context->getConstantInt(APInt::getLowBitsSet(TypeBits, 
+          ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, 
                                                        TypeBits-ShAmtVal));
         
-        Instruction *AndI =
-          BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                    Mask, LHSI->getName()+".mask");
-        Value *And = InsertNewInstBefore(AndI, ICI);
+        Value *And =
+          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
         return new ICmpInst(ICI.getPredicate(), And,
-                            Context->getConstantInt(RHSV.lshr(ShAmtVal)));
+                            ConstantInt::get(*Context, RHSV.lshr(ShAmtVal)));
       }
     }
     
@@ -6910,15 +6913,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (LHSI->hasOneUse() &&
         isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
       // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) <<
+      Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) <<
                                            (TypeBits-ShAmt->getZExtValue()-1));
-      Instruction *AndI =
-        BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                  Mask, LHSI->getName()+".mask");
-      Value *And = InsertNewInstBefore(AndI, ICI);
-      
+      Value *And =
+        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
-                          And, Context->getNullValue(And->getType()));
+                          And, Constant::getNullValue(And->getType()));
     }
     break;
   }
@@ -6948,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     
     if (Comp != RHSV) { // Comparing against a bit that we know is zero.
       bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-      Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE);
+      Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
       return ReplaceInstUsesWith(ICI, Cst);
     }
     
@@ -6959,20 +6959,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         MaskedValueIsZero(LHSI->getOperand(0), 
                           APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
       return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                          Context->getConstantExprShl(RHS, ShAmt));
+                          ConstantExpr::getShl(RHS, ShAmt));
     }
       
     if (LHSI->hasOneUse()) {
       // Otherwise strength reduce the shift into an and.
       APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-      Constant *Mask = Context->getConstantInt(Val);
+      Constant *Mask = ConstantInt::get(*Context, Val);
       
-      Instruction *AndI =
-        BinaryOperator::CreateAnd(LHSI->getOperand(0),
-                                  Mask, LHSI->getName()+".mask");
-      Value *And = InsertNewInstBefore(AndI, ICI);
+      Value *And = Builder->CreateAnd(LHSI->getOperand(0),
+                                      Mask, LHSI->getName()+".mask");
       return new ICmpInst(ICI.getPredicate(), And,
-                          Context->getConstantExprShl(RHS, ShAmt));
+                          ConstantExpr::getShl(RHS, ShAmt));
     }
     break;
   }
@@ -7005,18 +7003,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       if (ICI.isSignedPredicate()) {
         if (CR.getLower().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getUpper()));
+                              ConstantInt::get(*Context, CR.getUpper()));
         } else if (CR.getUpper().isSignBit()) {
           return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getLower()));
+                              ConstantInt::get(*Context, CR.getLower()));
         }
       } else {
         if (CR.getLower().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getUpper()));
+                              ConstantInt::get(*Context, CR.getUpper()));
         } else if (CR.getUpper().isMinValue()) {
           return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
-                              Context->getConstantInt(CR.getLower()));
+                              ConstantInt::get(*Context, CR.getLower()));
         }
       }
     }
@@ -7036,12 +7034,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
           const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
           if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
-            Instruction *NewRem =
-              BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
-                                         BO->getName());
-            InsertNewInstBefore(NewRem, ICI);
-            return new ICmpInst(ICI.getPredicate(), NewRem, 
-                                Context->getNullValue(BO->getType()));
+            Value *NewRem =
+              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+                                  BO->getName());
+            return new ICmpInst(ICI.getPredicate(), NewRem,
+                                Constant::getNullValue(BO->getType()));
           }
         }
         break;
@@ -7050,19 +7047,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           if (BO->hasOneUse())
             return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                Context->getConstantExprSub(RHS, BOp1C));
+                                ConstantExpr::getSub(RHS, BOp1C));
         } else if (RHSV == 0) {
           // Replace ((add A, B) != 0) with (A != -B) if A or B is
           // efficiently invertible, or if the add has just this one use.
           Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
           
-          if (Value *NegVal = dyn_castNegVal(BOp1, Context))
+          if (Value *NegVal = dyn_castNegVal(BOp1))
             return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          else if (Value *NegVal = dyn_castNegVal(BOp0, Context))
+          else if (Value *NegVal = dyn_castNegVal(BOp0))
             return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
           else if (BO->hasOneUse()) {
-            Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
-            InsertNewInstBefore(Neg, ICI);
+            Value *Neg = Builder->CreateNeg(BOp1);
             Neg->takeName(BO);
             return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
           }
@@ -7073,7 +7069,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // the explicit xor.
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
           return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
-                              Context->getConstantExprXor(RHS, BOC));
+                              ConstantExpr::getXor(RHS, BOC));
         
         // FALLTHROUGH
       case Instruction::Sub:
@@ -7087,10 +7083,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // If bits are being or'd in that are not present in the constant we
         // are comparing against, then the comparison could never succeed!
         if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-          Constant *NotCI = Context->getConstantExprNot(RHS);
-          if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue())
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
             return ReplaceInstUsesWith(ICI,
-                                       Context->getConstantInt(Type::Int1Ty, 
+                                       ConstantInt::get(Type::getInt1Ty(*Context), 
                                        isICMP_NE));
         }
         break;
@@ -7101,19 +7097,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // comparison can never succeed!
           if ((RHSV & ~BOC->getValue()) != 0)
             return ReplaceInstUsesWith(ICI,
-                                       Context->getConstantInt(Type::Int1Ty,
+                                       ConstantInt::get(Type::getInt1Ty(*Context),
                                        isICMP_NE));
           
           // If we have ((X & C) == C), turn it into ((X & C) != 0).
           if (RHS == BOC && RHSV.isPowerOf2())
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
-                                Context->getNullValue(RHS->getType()));
+                                Constant::getNullValue(RHS->getType()));
           
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
-            Constant *Zero = Context->getNullValue(X->getType());
+            Constant *Zero = Constant::getNullValue(X->getType());
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
             return new ICmpInst(pred, X, Zero);
@@ -7122,7 +7118,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           // ((X & ~7) == 0) --> X < 8
           if (RHSV == 0 && isHighOnes(BOC)) {
             Value *X = BO->getOperand(0);
-            Constant *NegX = Context->getConstantExprNeg(BOC);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
             ICmpInst::Predicate pred = isICMP_NE ? 
               ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
             return new ICmpInst(pred, X, NegX);
@@ -7133,9 +7129,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
       // Handle icmp {eq|ne} <intrinsic>, intcst.
       if (II->getIntrinsicID() == Intrinsic::bswap) {
-        AddToWorkList(II);
+        Worklist.Add(II);
         ICI.setOperand(0, II->getOperand(1));
-        ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap()));
+        ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap()));
         return &ICI;
       }
     }
@@ -7155,17 +7151,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
   // integer type is the same size as the pointer type.
-  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
-      getTargetData().getPointerSizeInBits() == 
+  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+      TD->getPointerSizeInBits() ==
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
-      RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy);
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
     } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
       RHSOp = RHSC->getOperand(0);
       // If the pointer types don't match, insert a bitcast.
       if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
     }
 
     if (RHSOp)
@@ -7212,8 +7208,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Compute the constant that would happen if we truncated to SrcTy then
   // reextended to DestTy.
-  Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy);
-  Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(),
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
                                                 Res1, DestTy);
 
   // If the re-extended constant didn't change...
@@ -7239,9 +7235,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // First, handle some easy cases. We know the result cannot be equal at this
   // point so handle the ICI.isEquality() cases
   if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse());
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
   if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue());
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
 
   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
   // should have been folded away previously and not enter in here.
@@ -7249,20 +7245,19 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   if (isSignedCmp) {
     // We're performing a signed comparison.
     if (cast<ConstantInt>(CI)->getValue().isNegative())
-      Result = Context->getConstantIntFalse();          // X < (small) --> false
+      Result = ConstantInt::getFalse(*Context);          // X < (small) --> false
     else
-      Result = Context->getConstantIntTrue();           // X < (large) --> true
+      Result = ConstantInt::getTrue(*Context);           // X < (large) --> true
   } else {
     // We're performing an unsigned comparison.
     if (isSignedExt) {
       // We're performing an unsigned comp with a sign extended value.
       // This is true if the input is >= 0. [aka >s -1]
-      Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy);
-      Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp,
-                                   NegOne, ICI.getName()), ICI);
+      Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
     } else {
       // Unsigned extend & unsigned compare -> always true.
-      Result = Context->getConstantIntTrue();
+      Result = ConstantInt::getTrue(*Context);
     }
   }
 
@@ -7275,7 +7270,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
           ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
          "ICmp should be folded!");
   if (Constant *CI = dyn_cast<Constant>(Result))
-    return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI));
+    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
   return BinaryOperator::CreateNot(Result);
 }
 
@@ -7317,21 +7312,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
 
   // shl X, 0 == X and shr X, 0 == X
   // shl 0, X == 0 and shr 0, X == 0
-  if (Op1 == Context->getNullValue(Op1->getType()) ||
-      Op0 == Context->getNullValue(Op0->getType()))
+  if (Op1 == Constant::getNullValue(Op1->getType()) ||
+      Op0 == Constant::getNullValue(Op0->getType()))
     return ReplaceInstUsesWith(I, Op0);
   
   if (isa<UndefValue>(Op0)) {            
     if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
       return ReplaceInstUsesWith(I, Op0);
     else                                    // undef << X -> 0, undef >>u X -> 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
   if (isa<UndefValue>(Op1)) {
     if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
       return ReplaceInstUsesWith(I, Op0);          
     else                                     // X << undef, X >>u undef -> 0
-      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
 
   // See if we can fold away this shift.
@@ -7363,9 +7358,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   //
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
-      return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType()));
+      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
     else {
-      I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1));
+      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
       return &I;
     }
   }
@@ -7375,7 +7370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
-                                        Context->getConstantExprShl(BOOp, Op1));
+                                        ConstantExpr::getShl(BOOp, Op1));
   
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
@@ -7396,10 +7391,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
-      Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType());
-      Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
-                                                I.getName());
-      InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      // (shift2 (shift1 & 0x00FF), c2)
+      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
 
       // For logical shifts, the truncation has the effect of making the high
       // part of the register be zeros.  Emulate this by inserting an AND to
@@ -7420,10 +7414,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         MaskV = MaskV.lshr(Op1->getZExtValue());
       }
 
-      Instruction *And =
-        BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV), 
-                                  TI->getName());
-      InsertNewInstBefore(And, I); // shift1 & 0x00FF
+      // shift1 & 0x00FF
+      Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV),
+                                      TI->getName());
 
       // Return the value truncated to the interesting size.
       return new TruncInst(And, I.getType());
@@ -7444,17 +7437,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           // These operators commute.
           // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
           if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
-              match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){
-            Instruction *YS = BinaryOperator::CreateShl(
-                                            Op0BO->getOperand(0), Op1,
-                                            Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *X = 
-              BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
-                                     Op0BO->getOperand(1)->getName());
-            InsertNewInstBefore(X, I);  // (X + (Y << C))
+              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+                    m_Specific(Op1)))) {
+            Value *YS =         // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+            // (X + (Y << C))
+            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                            Op0BO->getOperand(1)->getName());
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7465,16 +7456,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                     m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
                           m_ConstantInt(CC))) &&
               cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(0), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *XM =
-              BinaryOperator::CreateAnd(V1,
-                                        Context->getConstantExprShl(CC, Op1),
-                                        V1->getName()+".mask");
-            InsertNewInstBefore(XM, I); // X & (CC << C)
-            
+            Value *YS =   // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(0), Op1,
+                                           Op0BO->getName());
+            // X & (CC << C)
+            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                           V1->getName()+".mask");
             return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
           }
         }
@@ -7483,17 +7470,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         case Instruction::Sub: {
           // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
           if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
-              match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(1), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *X =
-              BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
-                                     Op0BO->getOperand(0)->getName());
-            InsertNewInstBefore(X, I);  // (X + (Y << C))
+              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+                    m_Specific(Op1)))) {
+            Value *YS =  // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            // (X + (Y << C))
+            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                            Op0BO->getOperand(0)->getName());
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, Context->getConstantInt(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -7504,15 +7489,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                           m_ConstantInt(CC))) && V2 == Op1 &&
               cast<BinaryOperator>(Op0BO->getOperand(0))
                   ->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::CreateShl(
-                                                     Op0BO->getOperand(1), Op1,
-                                                     Op0BO->getName());
-            InsertNewInstBefore(YS, I); // (Y << C)
-            Instruction *XM =
-              BinaryOperator::CreateAnd(V1, 
-                                        Context->getConstantExprShl(CC, Op1),
-                                        V1->getName()+".mask");
-            InsertNewInstBefore(XM, I); // X & (CC << C)
+            Value *YS = // (Y << C)
+              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            // X & (CC << C)
+            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                           V1->getName()+".mask");
             
             return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
           }
@@ -7552,11 +7533,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
         
         if (isValid) {
-          Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1);
+          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
           
-          Instruction *NewShift =
-            BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
-          InsertNewInstBefore(NewShift, I);
+          Value *NewShift =
+            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
           
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
@@ -7589,31 +7569,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // saturates.
       if (AmtSum >= TypeBits) {
         if (I.getOpcode() != Instruction::AShr)
-          return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
       
       return BinaryOperator::Create(I.getOpcode(), X,
-                                    Context->getConstantInt(Ty, AmtSum));
-    } else if (ShiftOp->getOpcode() == Instruction::LShr &&
-               I.getOpcode() == Instruction::AShr) {
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::LShr &&
+        I.getOpcode() == Instruction::AShr) {
       if (AmtSum >= TypeBits)
-        return ReplaceInstUsesWith(I, Context->getNullValue(I.getType()));
+        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
       
       // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum));
-    } else if (ShiftOp->getOpcode() == Instruction::AShr &&
-               I.getOpcode() == Instruction::LShr) {
+      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::AShr &&
+        I.getOpcode() == Instruction::LShr) {
       // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
       if (AmtSum >= TypeBits)
         AmtSum = TypeBits-1;
       
-      Instruction *Shift =
-        BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum));
-      InsertNewInstBefore(Shift, I);
+      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
 
       APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask));
     }
     
     // Okay, if we get here, one shift must be left, and the other shift must be
@@ -7622,12 +7604,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // If we have ((X >>? C) << C), turn this into X & (-1 << C).
       if (I.getOpcode() == Instruction::Shl) {
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
       }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr) {
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
       }
       // We can simplify ((X << C) >>s C) into a trunc + sext.
       // NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -7641,15 +7623,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       case 32 :
       case 64 :
       case 128:
-        SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1);
+        SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1);
         break;
       default: break;
       }
-      if (SExtType) {
-        Instruction *NewTrunc = new TruncInst(X, SExtType, "sext");
-        InsertNewInstBefore(NewTrunc, I);
-        return new SExtInst(NewTrunc, Ty);
-      }
+      if (SExtType)
+        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
       // Otherwise, we can't handle it yet.
     } else if (ShiftAmt1 < ShiftAmt2) {
       uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
@@ -7658,23 +7637,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       if (I.getOpcode() == Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
-        Instruction *Shift =
-          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Instruction *Shift =
-          BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -7686,24 +7663,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       if (I.getOpcode() == Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
-        Instruction *Shift =
-          BinaryOperator::Create(ShiftOp->getOpcode(), X,
-                                 Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+                                            ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Instruction *Shift =
-          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff));
-        InsertNewInstBefore(Shift, I);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(*Context, Mask));
       }
       
       // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -7718,12 +7693,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 /// X*Scale+Offset.
 ///
 static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        int &Offset, LLVMContext* Context) {
-  assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
+                                        int &Offset, LLVMContext *Context) {
+  assert(Val->getType() == Type::getInt32Ty(*Context) && 
+         "Unexpected allocation size type!");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
     Scale  = 0;
-    return Context->getConstantInt(Type::Int32Ty, 0);
+    return ConstantInt::get(Type::getInt32Ty(*Context), 0);
   } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
@@ -7763,6 +7739,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocationInst &AI) {
   const PointerType *PTy = cast<PointerType>(CI.getType());
   
+  BuilderTy AllocaBuilder(*Builder);
+  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+  
   // Remove any uses of AI that are dead.
   assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
   
@@ -7773,11 +7752,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
         ++UI; // If this instruction uses AI more than once, don't break UI.
       
       ++NumDeadInst;
-      DOUT << "IC: DCE: " << *User;
+      DEBUG(errs() << "IC: DCE: " << *User << '\n');
       EraseInstFromFunction(*User);
     }
   }
-  
+
+  // This requires TargetData to get the alloca alignment and size information.
+  if (!TD) return 0;
+
   // Get the type really allocated and the type casted to.
   const Type *AllocElTy = AI.getAllocatedType();
   const Type *CastElTy = PTy->getElementType();
@@ -7816,30 +7798,22 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   if (Scale == 1) {
     Amt = NumElements;
   } else {
-    // If the allocation size is constant, form a constant mul expression
-    Amt = Context->getConstantInt(Type::Int32Ty, Scale);
-    if (isa<ConstantInt>(NumElements))
-      Amt = Context->getConstantExprMul(cast<ConstantInt>(NumElements),
-                                 cast<ConstantInt>(Amt));
-    // otherwise multiply the amount and the number of elements
-    else {
-      Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
-      Amt = InsertNewInstBefore(Tmp, AI);
-    }
+    Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale);
+    // Insert before the alloca, not before the cast.
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
   }
   
   if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true);
-    Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
-    Amt = InsertNewInstBefore(Tmp, AI);
+    Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true);
+    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
   }
   
   AllocationInst *New;
   if (isa<MallocInst>(AI))
-    New = new MallocInst(CastElTy, Amt, AI.getAlignment());
+    New = AllocaBuilder.CreateMalloc(CastElTy, Amt);
   else
-    New = new AllocaInst(CastElTy, Amt, AI.getAlignment());
-  InsertNewInstBefore(New, AI);
+    New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
   
   // If the allocation has one real use plus a dbg.declare, just remove the
@@ -7851,11 +7825,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   // things that used it to use the new cast.  This will also hack on CI, but it
   // will die soon.
   else if (!AI.hasOneUse()) {
-    AddUsesToWorkList(AI);
     // New is the allocation instruction, pointer typed. AI is the original
     // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
-    CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast");
-    InsertNewInstBefore(NewCast, AI);
+    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
     AI.replaceAllUsesWith(NewCast);
   }
   return ReplaceInstUsesWith(CI, New);
@@ -7923,6 +7895,23 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
            CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
                                       NumCastsRemoved);
 
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // UDiv and URem can be truncated if all the truncated bits are zero.
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    if (BitWidth < OrigBitWidth) {
+      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+          MaskedValueIsZero(I->getOperand(1), Mask)) {
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved) &&
+               CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                          NumCastsRemoved);
+      }
+    }
+    break;
+  }
   case Instruction::Shl:
     // If we are truncating the result of this SHL, and if it's a shift of a
     // constant amount, we can always perform a SHL in a smaller type.
@@ -7993,7 +7982,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
 Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V))
-    return Context->getConstantExprIntegerCast(C, Ty,
+    return ConstantExpr::getIntegerCast(C, Ty,
                                                isSigned /*Sext or ZExt*/);
 
   // Otherwise, it must be an instruction.
@@ -8009,7 +7998,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   case Instruction::Xor:
   case Instruction::AShr:
   case Instruction::LShr:
-  case Instruction::Shl: {
+  case Instruction::Shl:
+  case Instruction::UDiv:
+  case Instruction::URem: {
     Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
     Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -8046,7 +8037,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   }
   default: 
     // TODO: Can handle more cases here.
-    assert(0 && "Unreachable!");
+    llvm_unreachable("Unreachable!");
     break;
   }
   
@@ -8089,13 +8080,14 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
 static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
                                        SmallVectorImpl<Value*> &NewIndices,
                                        const TargetData *TD,
-                                       LLVMContext* Context) {
+                                       LLVMContext *Context) {
+  if (!TD) return 0;
   if (!Ty->isSized()) return 0;
   
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  const Type *IntPtrTy = TD->getIntPtrType();
+  const Type *IntPtrTy = TD->getIntPtrType(*Context);
   int64_t FirstIdx = 0;
   if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
@@ -8110,7 +8102,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
     assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
   }
   
-  NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx));
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
     
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
@@ -8124,14 +8116,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,
              "Offset must stay within the indexed type");
       
       unsigned Elt = SL->getElementContainingOffset(Offset);
-      NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt));
+      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt));
       
       Offset -= SL->getElementOffset(Elt);
       Ty = STy->getElementType(Elt);
     } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -8154,7 +8146,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       // Changing the cast operand is usually not a good idea but it is safe
       // here because the pointer operand is being replaced with another 
       // pointer operand so the opcode doesn't need to change.
-      AddToWorkList(GEP);
+      Worklist.Add(GEP);
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
@@ -8163,7 +8155,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
-    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
       if (GEP->hasAllConstantIndices()) {
         // We are guaranteed to get a constant from EmitGEPOffset.
         ConstantInt *OffsetV =
@@ -8179,10 +8171,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
           // If we were able to index down into an element, create the GEP
           // and bitcast the result.  This eliminates one bitcast, potentially
           // two.
-          Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
-                                                        NewIndices.begin(),
-                                                        NewIndices.end(), "");
-          InsertNewInstBefore(NGEP, CI);
+          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(OrigBase,
+                                       NewIndices.begin(), NewIndices.end()) :
+            Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
           NGEP->takeName(GEP);
           
           if (isa<BitCastInst>(CI))
@@ -8214,10 +8206,8 @@ static bool isSafeIntegerType(const Type *Ty) {
   }
 }
 
-/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
-/// integer types. This function implements the common transforms for all those
-/// cases.
-/// @brief Implement the transforms common to CastInst with integer operands
+/// commonIntCastTransforms - This function implements the common transforms
+/// for trunc, zext, and sext.
 Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
@@ -8241,11 +8231,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
 
   // Attempt to propagate the cast into the instruction for int->int casts.
   int NumCastsRemoved = 0;
-  if (!isa<BitCastInst>(CI) &&
-      // Only do this if the dest type is a simple type, don't convert the
-      // expression tree to something weird like i93 unless the source is also
-      // strange.
-      (isSafeIntegerType(DestTy->getScalarType()) ||
+  // Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((isSafeIntegerType(DestTy->getScalarType()) ||
        !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
       CanEvaluateInDifferentType(SrcI, DestTy,
                                  CI.getOpcode(), NumCastsRemoved)) {
@@ -8261,7 +8250,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     default:
       // All the others use floating point so we shouldn't actually 
       // get here because of the check above.
-      assert(0 && "Unknown cast type");
+      llvm_unreachable("Unknown cast type");
     case Instruction::Trunc:
       DoXForm = true;
       break;
@@ -8307,8 +8296,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     }
     
     if (DoXForm) {
-      DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid"
-           << " cast: " << CI;
+      DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type"
+            " to avoid cast: " << CI);
       Value *Res = EvaluateInDifferentType(SrcI, DestTy, 
                                            CI.getOpcode() == Instruction::SExt);
       if (JustReplace)
@@ -8317,9 +8306,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
 
       assert(Res->getType() == DestTy);
       switch (CI.getOpcode()) {
-      default: assert(0 && "Unknown cast type!");
+      default: llvm_unreachable("Unknown cast type!");
       case Instruction::Trunc:
-      case Instruction::BitCast:
         // Just replace this cast with the result.
         return ReplaceInstUsesWith(CI, Res);
       case Instruction::ZExt: {
@@ -8332,8 +8320,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           return ReplaceInstUsesWith(CI, Res);
 
         // We need to emit an AND to clear the high bits.
-        Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize,
-                                                            SrcBitSize));
+        Constant *C = ConstantInt::get(*Context, 
+                                 APInt::getLowBitsSet(DestBitSize, SrcBitSize));
         return BinaryOperator::CreateAnd(Res, C);
       }
       case Instruction::SExt: {
@@ -8344,9 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           return ReplaceInstUsesWith(CI, Res);
 
         // We need to emit a cast to truncate, then a cast to sext.
-        return CastInst::Create(Instruction::SExt,
-            InsertCastBefore(Instruction::Trunc, Res, Src->getType(), 
-                             CI), DestTy);
+        return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy);
       }
       }
     }
@@ -8362,16 +8348,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   case Instruction::Or:
   case Instruction::Xor:
     // If we are discarding information, rewrite.
-    if (DestBitSize <= SrcBitSize && DestBitSize != 1) {
-      // Don't insert two casts if they cannot be eliminated.  We allow 
-      // two casts to be inserted if the sizes are the same.  This could 
-      // only be converting signedness, which is a noop.
-      if (DestBitSize == SrcBitSize || 
-          !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) ||
+    if (DestBitSize < SrcBitSize && DestBitSize != 1) {
+      // Don't insert two casts unless at least one can be eliminated.
+      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||
           !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Instruction::CastOps opcode = CI.getOpcode();
-        Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
-        Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+        Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+        Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
         return BinaryOperator::Create(
             cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
       }
@@ -8380,62 +8362,25 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
     // cast (xor bool X, true) to int  --> xor (cast bool X to int), 1
     if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 
         SrcI->getOpcode() == Instruction::Xor &&
-        Op1 == Context->getConstantIntTrue() &&
+        Op1 == ConstantInt::getTrue(*Context) &&
         (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
-      Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI);
+      Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName());
       return BinaryOperator::CreateXor(New,
-                                      Context->getConstantInt(CI.getType(), 1));
-    }
-    break;
-  case Instruction::SDiv:
-  case Instruction::UDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-    // If we are just changing the sign, rewrite.
-    if (DestBitSize == SrcBitSize) {
-      // Don't insert two casts if they cannot be eliminated.  We allow 
-      // two casts to be inserted if the sizes are the same.  This could 
-      // only be converting signedness, which is a noop.
-      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || 
-          !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Value *Op0c = InsertCastBefore(Instruction::BitCast, 
-                                       Op0, DestTy, *SrcI);
-        Value *Op1c = InsertCastBefore(Instruction::BitCast, 
-                                       Op1, DestTy, *SrcI);
-        return BinaryOperator::Create(
-          cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
-      }
+                                      ConstantInt::get(CI.getType(), 1));
     }
     break;
 
-  case Instruction::Shl:
-    // Allow changing the sign of the source operand.  Do not allow 
-    // changing the size of the shift, UNLESS the shift amount is a 
-    // constant.  We must not change variable sized shifts to a smaller 
-    // size, because it is undefined to shift more bits out than exist 
-    // in the value.
-    if (DestBitSize == SrcBitSize ||
-        (DestBitSize < SrcBitSize && isa<Constant>(Op1))) {
-      Instruction::CastOps opcode = (DestBitSize == SrcBitSize ?
-          Instruction::BitCast : Instruction::Trunc);
-      Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI);
-      Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI);
+  case Instruction::Shl: {
+    // Canonicalize trunc inside shl, if we can.
+    ConstantInt *CI = dyn_cast<ConstantInt>(Op1);
+    if (CI && DestBitSize < SrcBitSize &&
+        CI->getLimitedValue(DestBitSize) < DestBitSize) {
+      Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
+      Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
       return BinaryOperator::CreateShl(Op0c, Op1c);
     }
     break;
-  case Instruction::AShr:
-    // If this is a signed shr, and if all bits shifted in are about to be
-    // truncated off, turn it into an unsigned shr to allow greater
-    // simplifications.
-    if (DestBitSize < SrcBitSize &&
-        isa<ConstantInt>(Op1)) {
-      uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
-      if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
-        // Insert the new logical shift right.
-        return BinaryOperator::CreateLShr(Op0, Op1);
-      }
-    }
-    break;
+  }
   }
   return 0;
 }
@@ -8450,11 +8395,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
 
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
-  if (DestBitWidth == 1 &&
-      isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
-    Constant *One = Context->getConstantInt(Src->getType(), 1);
-    Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
-    Value *Zero = Context->getNullValue(Src->getType());
+  if (DestBitWidth == 1) {
+    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Src = Builder->CreateAnd(Src, One, "tmp");
+    Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
 
@@ -8469,12 +8413,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));
     if (MaskedValueIsZero(ShiftOp, Mask)) {
       if (ShAmt >= DestBitWidth)        // All zeros.
-        return ReplaceInstUsesWith(CI, Context->getNullValue(Ty));
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));
       
       // Okay, we can shrink this.  Truncate the input, then return a new
       // shift.
-      Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI);
-      Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty);
+      Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName());
+      Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
       return BinaryOperator::CreateLShr(V1, V2);
     }
   }
@@ -8499,20 +8443,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
       if (!DoXform) return ICI;
 
       Value *In = ICI->getOperand(0);
-      Value *Sh = Context->getConstantInt(In->getType(),
+      Value *Sh = ConstantInt::get(In->getType(),
                                    In->getType()->getScalarSizeInBits()-1);
-      In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                               CI);
+      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
       if (In->getType() != CI.getType())
-        In = CastInst::CreateIntegerCast(In, CI.getType(),
-                                         false/*ZExt*/, "tmp", &CI);
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
 
       if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-        Constant *One = Context->getConstantInt(In->getType(), 1);
-        In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
-                                                         In->getName()+".not"),
-                                 CI);
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = Builder->CreateXor(In, One, In->getName()+".not");
       }
 
       return ReplaceInstUsesWith(CI, In);
@@ -8545,8 +8484,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
           // (X&4) == 2 --> false
           // (X&4) != 2 --> true
-          Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE);
-          Res = Context->getConstantExprZExt(Res, CI.getType());
+          Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
           return ReplaceInstUsesWith(CI, Res);
         }
           
@@ -8555,15 +8494,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         if (ShiftAmt) {
           // Perform a logical shr by shiftamt.
           // Insert the shift to put the result in the low bit.
-          In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
-                              Context->getConstantInt(In->getType(), ShiftAmt),
-                                                   In->getName()+".lobit"), CI);
+          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+                                   In->getName()+".lobit");
         }
           
         if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-          Constant *One = Context->getConstantInt(In->getType(), 1);
-          In = BinaryOperator::CreateXor(In, One, "tmp");
-          InsertNewInstBefore(cast<Instruction>(In), CI);
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = Builder->CreateXor(In, One, "tmp");
         }
           
         if (CI.getType() == In->getType())
@@ -8600,21 +8537,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     // SrcSize  > DstSize: trunc(a) & mask
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = Context->getConstantInt(A->getType(), AndValue);
-      Instruction *And =
-        BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
-      InsertNewInstBefore(And, CI);
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
       return new ZExtInst(And, CI.getType());
-    } else if (SrcSize == DstSize) {
+    }
+    
+    if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(),
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
                                                            AndValue));
-    } else if (SrcSize > DstSize) {
-      Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
-      InsertNewInstBefore(Trunc, CI);
+    }
+    if (SrcSize > DstSize) {
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
       return BinaryOperator::CreateAnd(Trunc, 
-                                       Context->getConstantInt(Trunc->getType(),
+                                       ConstantInt::get(Trunc->getType(),
                                                                AndValue));
     }
   }
@@ -8631,8 +8568,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
-      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
-      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
       return BinaryOperator::Create(Instruction::Or, LCast, RCast);
     }
   }
@@ -8645,7 +8582,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
         if (TI0->getType() == CI.getType())
           return
             BinaryOperator::CreateAnd(TI0,
-                                Context->getConstantExprZExt(C, CI.getType()));
+                                ConstantExpr::getZExt(C, CI.getType()));
       }
 
   // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
@@ -8657,9 +8594,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
           if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
             Value *TI0 = TI->getOperand(0);
             if (TI0->getType() == CI.getType()) {
-              Constant *ZC = Context->getConstantExprZExt(C, CI.getType());
-              Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
-              InsertNewInstBefore(NewAnd, *And);
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
               return BinaryOperator::CreateXor(NewAnd, ZC);
             }
           }
@@ -8674,14 +8610,14 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   Value *Src = CI.getOperand(0);
   
   // Canonicalize sign-extend from i1 to a select.
-  if (Src->getType() == Type::Int1Ty)
+  if (Src->getType() == Type::getInt1Ty(*Context))
     return SelectInst::Create(Src,
-                              Context->getConstantIntAllOnesValue(CI.getType()),
-                              Context->getNullValue(CI.getType()));
+                              Constant::getAllOnesValue(CI.getType()),
+                              Constant::getNullValue(CI.getType()));
 
   // See if the value being truncated is already sign extended.  If so, just
   // eliminate the trunc/sext pair.
-  if (getOpcode(Src) == Instruction::Trunc) {
+  if (Operator::getOpcode(Src) == Instruction::Trunc) {
     Value *Op = cast<User>(Src)->getOperand(0);
     unsigned OpBits   = Op->getType()->getScalarSizeInBits();
     unsigned MidBits  = Src->getType()->getScalarSizeInBits();
@@ -8729,9 +8665,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       unsigned MidSize = Src->getType()->getScalarSizeInBits();
       unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
       unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
-      Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt);
-      I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
-                                                        CI.getName()), CI);
+      Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+      I = Builder->CreateShl(I, ShAmtV, CI.getName());
       return BinaryOperator::CreateAShr(I, ShAmtV);
     }
   }
@@ -8742,18 +8677,18 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
 /// FitsInFPType - Return a Constant* for the specified FP constant if it fits
 /// in the specified FP type without changing its value.
 static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem,
-                              LLVMContext* Context) {
+                              LLVMContext *Context) {
   bool losesInfo;
   APFloat F = CFP->getValueAPF();
   (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
   if (!losesInfo)
-    return Context->getConstantFP(F);
+    return ConstantFP::get(*Context, F);
   return 0;
 }
 
 /// LookThroughFPExtensions - If this is an fp extension instruction, look
 /// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {
+static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
       return LookThroughFPExtensions(I->getOperand(0), Context);
@@ -8762,12 +8697,12 @@ static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {
   // that can accurately represent it.  This allows us to turn
   // (float)((double)X+2.0) into x+2.0f.
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType() == Type::PPC_FP128Ty)
+    if (CFP->getType() == Type::getPPC_FP128Ty(*Context))
       return V;  // No constant folding of this.
     // See if the value can be truncated to float and then reextended.
     if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context))
       return V;
-    if (CFP->getType() == Type::DoubleTy)
+    if (CFP->getType() == Type::getDoubleTy(*Context))
       return V;  // Won't shrink.
     if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context))
       return V;
@@ -8804,10 +8739,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         // the cast, do this xform.
         if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
             RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
-          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
-                                      CI.getType(), CI);
-          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
-                                      CI.getType(), CI);
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
           return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
         }
       }
@@ -8875,10 +8808,11 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // trunc to be exposed to other transforms.  Don't do this for extending
   // ptrtoint's, because we don't know if the target sign or zero extends its
   // pointers.
-  if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-    Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
-                                                    TD->getIntPtrType(),
-                                                    "tmp"), CI);
+  if (TD &&
+      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                       TD->getIntPtrType(CI.getContext()),
+                                       "tmp");
     return new TruncInst(P, CI.getType());
   }
   
@@ -8891,65 +8825,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // allows the trunc to be exposed to other transforms.  Don't do this for
   // extending inttoptr's, because we don't know if the target sign or zero
   // extends to pointers.
-  if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
       TD->getPointerSizeInBits()) {
-    Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
-                                                 TD->getIntPtrType(),
-                                                 "tmp"), CI);
+    Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                    TD->getIntPtrType(CI.getContext()), "tmp");
     return new IntToPtrInst(P, CI.getType());
   }
   
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
-  const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType();
-  if (!DestPointee->isSized()) return 0;
-
-  // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP.
-  ConstantInt *Cst;
-  Value *X;
-  if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)),
-                                    m_ConstantInt(Cst)))) {
-    // If the source and destination operands have the same type, see if this
-    // is a single-index GEP.
-    if (X->getType() == CI.getType()) {
-      // Get the size of the pointee type.
-      uint64_t Size = TD->getTypeAllocSize(DestPointee);
-
-      // Convert the constant to intptr type.
-      APInt Offset = Cst->getValue();
-      Offset.sextOrTrunc(TD->getPointerSizeInBits());
-
-      // If Offset is evenly divisible by Size, we can do this xform.
-      if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
-        Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
-        return GetElementPtrInst::Create(X, Context->getConstantInt(Offset));
-      }
-    }
-    // TODO: Could handle other cases, e.g. where add is indexing into field of
-    // struct etc.
-  } else if (CI.getOperand(0)->hasOneUse() &&
-             match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) {
-    // Otherwise, if this is inttoptr(add x, cst), try to turn this into an
-    // "inttoptr+GEP" instead of "add+intptr".
-    
-    // Get the size of the pointee type.
-    uint64_t Size = TD->getTypeAllocSize(DestPointee);
-    
-    // Convert the constant to intptr type.
-    APInt Offset = Cst->getValue();
-    Offset.sextOrTrunc(TD->getPointerSizeInBits());
-    
-    // If Offset is evenly divisible by Size, we can do this xform.
-    if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
-      Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
-      
-      Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
-                                                            "tmp"), CI);
-      return GetElementPtrInst::Create(P,
-                                       Context->getConstantInt(Offset), "tmp");
-    }
-  }
+
   return 0;
 }
 
@@ -8960,10 +8845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   const Type *SrcTy = Src->getType();
   const Type *DestTy = CI.getType();
 
-  if (SrcTy->isInteger() && DestTy->isInteger()) {
-    if (Instruction *Result = commonIntCastTransforms(CI))
-      return Result;
-  } else if (isa<PointerType>(SrcTy)) {
+  if (isa<PointerType>(SrcTy)) {
     if (Instruction *I = commonPointerCastTransforms(CI))
       return I;
   } else {
@@ -8987,8 +8869,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
       return 0;
     
-    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // If we are casting a alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
     if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
       if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
         return V;
@@ -8996,7 +8880,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
-    Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty);
+    Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context));
     unsigned NumZeros = 0;
     while (SrcElTy != DstElTy && 
            isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
@@ -9008,8 +8892,30 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
       SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", 
-                                       ((Instruction*) NULL));
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "",
+                                               ((Instruction*) NULL));
+    }
+  }
+
+  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+    if (DestVTy->getNumElements() == 1) {
+      if (!isa<VectorType>(SrcTy)) {
+        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+        return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+                            Constant::getNullValue(Type::getInt32Ty(*Context)));
+      }
+      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+    }
+  }
+
+  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+    if (SrcVTy->getNumElements() == 1) {
+      if (!isa<VectorType>(DestTy)) {
+        Value *Elem = 
+          Builder->CreateExtractElement(Src,
+                            Constant::getNullValue(Type::getInt32Ty(*Context)));
+        return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+      }
     }
   }
 
@@ -9030,10 +8936,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
              Tmp->getOperand(0)->getType() == DestTy) ||
             ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 
              Tmp->getOperand(0)->getType() == DestTy)) {
-          Value *LHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(0), DestTy, CI);
-          Value *RHS = InsertCastBefore(Instruction::BitCast,
-                                        SVI->getOperand(1), DestTy, CI);
+          Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+          Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
           // Return a new shuffle vector.  Use the same element ID's, as we
           // know the vector types match #elts.
           return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
@@ -9076,9 +8980,9 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
 /// GetSelectFoldableConstant - For the same transformation as the previous
 /// function, return the identity constant that goes into the select.
 static Constant *GetSelectFoldableConstant(Instruction *I,
-                                           LLVMContext* Context) {
+                                           LLVMContext *Context) {
   switch (I->getOpcode()) {
-  default: assert(0 && "This cannot happen!"); abort();
+  default: llvm_unreachable("This cannot happen!");
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Or:
@@ -9086,11 +8990,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I,
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    return Context->getNullValue(I->getType());
+    return Constant::getNullValue(I->getType());
   case Instruction::And:
-    return Context->getAllOnesValue(I->getType());
+    return Constant::getAllOnesValue(I->getType());
   case Instruction::Mul:
-    return Context->getConstantInt(I->getType(), 1);
+    return ConstantInt::get(I->getType(), 1);
   }
 }
 
@@ -9110,7 +9014,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
 
     // Fold this by inserting a select from the input values.
     SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
-                                           FI->getOperand(0), SI.getName()+".v");
+                                          FI->getOperand(0), SI.getName()+".v");
     InsertNewInstBefore(NewSI, SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
                             TI->getType());
@@ -9160,7 +9064,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     else
       return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
   }
-  assert(0 && "Shouldn't get here");
+  llvm_unreachable("Shouldn't get here");
   return 0;
 }
 
@@ -9202,7 +9106,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             NewSel->takeName(TVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
               return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
-            assert(0 && "Unknown instruction!!");
+            llvm_unreachable("Unknown instruction!!");
           }
         }
       }
@@ -9231,7 +9135,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             NewSel->takeName(FVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
               return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
-            assert(0 && "Unknown instruction!!");
+            llvm_unreachable("Unknown instruction!!");
           }
         }
       }
@@ -9266,7 +9170,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        Constant *AdjustedRHS = SubOne(CI, Context);
+        Constant *AdjustedRHS = SubOne(CI);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9286,7 +9190,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
           return ReplaceInstUsesWith(SI, FalseVal);
         // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        Constant *AdjustedRHS = AddOne(CI, Context);
+        Constant *AdjustedRHS = AddOne(CI);
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
             (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
           Pred = ICmpInst::getSwappedPredicate(Pred);
@@ -9323,10 +9227,10 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
             (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
           Value *In = ICI->getOperand(0);
-          Value *Sh = Context->getConstantInt(In->getType(),
+          Value *Sh = ConstantInt::get(In->getType(),
                                        In->getType()->getScalarSizeInBits()-1);
           In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
-                                                          In->getName()+".lobit"),
+                                                        In->getName()+".lobit"),
                                    *ICI);
           if (In->getType() != SI.getType())
             In = CastInst::CreateIntegerCast(In, SI.getType(),
@@ -9365,6 +9269,14 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   return Changed ? &SI : 0;
 }
 
+/// isDefinedInBB - Return true if the value is an instruction defined in the
+/// specified basicblock.
+static bool isDefinedInBB(const Value *V, const BasicBlock *BB) {
+  const Instruction *I = dyn_cast<Instruction>(V);
+  return I != 0 && I->getParent() == BB;
+}
+
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -9390,7 +9302,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return ReplaceInstUsesWith(SI, FalseVal);
   }
 
-  if (SI.getType() == Type::Int1Ty) {
+  if (SI.getType() == Type::getInt1Ty(*Context)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
@@ -9438,26 +9350,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
       if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
-
-        // (x <s 0) ? -1 : 0 -> ashr x, 31
-        if (TrueValC->isAllOnesValue() && FalseValC->isZero())
-          if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) {
-            if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) {
-              // The comparison constant and the result are not neccessarily the
-              // same width. Make an all-ones value by inserting a AShr.
-              Value *X = IC->getOperand(0);
-              uint32_t Bits = X->getType()->getScalarSizeInBits();
-              Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1);
-              Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
-                                                        ShAmt, "ones");
-              InsertNewInstBefore(SRA, SI);
-
-              // Then cast to the appropriate width.
-              return CastInst::CreateIntegerCast(SRA, SI.getType(), true);
-            }
-          }
-
-
         // If one of the constants is zero (we know they can't both be) and we
         // have an icmp instruction with zero, and we have an 'and' with the
         // non-constant value, eliminate this whole mess.  This corresponds to
@@ -9568,10 +9460,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             //        select C, (add X, Y), (sub X, Z)
             Value *NegVal;  // Compute -Z
             if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
-              NegVal = Context->getConstantExprNeg(C);
+              NegVal = ConstantExpr::getNeg(C);
             } else {
               NegVal = InsertNewInstBefore(
-                    BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
             }
 
             Value *NewTrueOp = OtherAddOp;
@@ -9595,6 +9488,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return FoldI;
   }
 
+  // See if we can fold the select into a phi node.  The true/false values have
+  // to be live in the predecessor blocks.  If they are instructions in SI's
+  // block, we can't map to the predecessor.
+  if (isa<PHINode>(SI.getCondition()) &&
+      (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) ||
+       isa<PHINode>(SI.getTrueValue())) &&
+      (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) ||
+       isa<PHINode>(SI.getFalseValue())))
+    if (Instruction *NV = FoldOpIntoPhi(SI))
+      return NV;
+
   if (BinaryOperator::isNot(CondVal)) {
     SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
     SI.setOperand(1, FalseVal);
@@ -9617,7 +9521,7 @@ static unsigned EnforceKnownAlignment(Value *V,
   User *U = dyn_cast<User>(V);
   if (!U) return Align;
 
-  switch (getOpcode(U)) {
+  switch (Operator::getOpcode(U)) {
   default: break;
   case Instruction::BitCast:
     return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
@@ -9650,16 +9554,13 @@ static unsigned EnforceKnownAlignment(Value *V,
         Align = PrefAlign;
       }
     }
-  } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.  We
-    // don't do this for malloc, because some systems can't respect the request.
-    if (isa<AllocaInst>(AI)) {
-      if (AI->getAlignment() >= PrefAlign)
-        Align = AI->getAlignment();
-      else {
-        AI->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
+  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      Align = AI->getAlignment();
+    else {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
     }
   }
 
@@ -9694,7 +9595,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   unsigned CopyAlign = MI->getAlignment();
 
   if (CopyAlign < MinAlign) {
-    MI->setAlignment(MinAlign);
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+                                             MinAlign, false));
     return MI;
   }
   
@@ -9715,7 +9617,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   
   // Use an integer load+store unless we can find something better.
   Type *NewPtrTy =
-                Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3));
+                PointerType::getUnqual(IntegerType::get(*Context, Size<<3));
   
   // Memcpy forces the use of i8* for the source and destination.  That means
   // that if you're using memcpy to move one double around, you'll get a cast
@@ -9725,7 +9627,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   // integer datatype.
   if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
     const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
-    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
       // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
       // down through these levels if so.
       while (!SrcETy->isSingleValueType()) {
@@ -9744,7 +9646,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
       }
       
       if (SrcETy->isSingleValueType())
-        NewPtrTy = Context->getPointerTypeUnqual(SrcETy);
+        NewPtrTy = PointerType::getUnqual(SrcETy);
     }
   }
   
@@ -9754,28 +9656,29 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   SrcAlign = std::max(SrcAlign, CopyAlign);
   DstAlign = std::max(DstAlign, CopyAlign);
   
-  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
-  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
   Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
   InsertNewInstBefore(L, *MI);
   InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
 
   // Set the size of the copy to 0, it will be deleted on the next iteration.
-  MI->setOperand(3, Context->getNullValue(MemOpLength->getType()));
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
   return MI;
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
   if (MI->getAlignment() < Alignment) {
-    MI->setAlignment(Alignment);
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+                                             Alignment, false));
     return MI;
   }
   
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
-  if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
+  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context))
     return 0;
   uint64_t Len = LenC->getZExtValue();
   Alignment = MI->getAlignment();
@@ -9785,21 +9688,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
-    const Type *ITy = Context->getIntegerType(Len*8);  // n=1 -> i8.
+    const Type *ITy = IntegerType::get(*Context, Len*8);  // n=1 -> i8.
     
     Value *Dest = MI->getDest();
-    Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI);
+    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
     
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
-    InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill),
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
                                       Dest, false, Alignment), *MI);
     
     // Set the size of the copy to 0, it will be deleted on the next iteration.
-    MI->setLength(Context->getNullValue(LenC->getType()));
+    MI->setLength(Constant::getNullValue(LenC->getType()));
     return MI;
   }
 
@@ -9820,8 +9723,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     return &CI;
   }
   
-  
-  
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
   if (!II) return visitCallSite(&CI);
   
@@ -9891,9 +9792,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC lvx     -> load if the pointer is known aligned.
     // Turn X86 loadups -> load if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1),
-                                   Context->getPointerTypeUnqual(II->getType()),
-                                       CI);
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+                                         PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
     }
     break;
@@ -9902,8 +9802,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn stvx -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
       const Type *OpPtrTy = 
-        Context->getPointerTypeUnqual(II->getOperand(1)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+        PointerType::getUnqual(II->getOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
       return new StoreInst(II->getOperand(1), Ptr);
     }
     break;
@@ -9913,8 +9813,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn X86 storeu -> store if the pointer is known aligned.
     if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
       const Type *OpPtrTy = 
-        Context->getPointerTypeUnqual(II->getOperand(2)->getType());
-      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+        PointerType::getUnqual(II->getOperand(2)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
       return new StoreInst(II->getOperand(2), Ptr);
     }
     break;
@@ -9951,9 +9851,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       
       if (AllEltsOk) {
         // Cast the input vectors to byte vectors.
-        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
-        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
-        Value *Result = Context->getUndef(Op0->getType());
+        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+        Value *Result = UndefValue::get(Op0->getType());
         
         // Only extract each element once.
         Value *ExtractedElts[32];
@@ -9966,16 +9866,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           Idx &= 31;  // Match the hardware behavior.
           
           if (ExtractedElts[Idx] == 0) {
-            Instruction *Elt = 
-              new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
-            InsertNewInstBefore(Elt, CI);
-            ExtractedElts[Idx] = Elt;
+            ExtractedElts[Idx] = 
+              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
+                  ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false),
+                                            "tmp");
           }
         
           // Insert this value into the result vector.
-          Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
-                                             i, "tmp");
-          InsertNewInstBefore(cast<Instruction>(Result), CI);
+          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+                         ConstantInt::get(Type::getInt32Ty(*Context), i, false),
+                                                "tmp");
         }
         return CastInst::Create(Instruction::BitCast, Result, CI.getType());
       }
@@ -9999,7 +9899,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     TerminatorInst *TI = II->getParent()->getTerminator();
     bool CannotRemove = false;
     for (++BI; &*BI != TI; ++BI) {
-      if (isa<AllocaInst>(BI)) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
         CannotRemove = true;
         break;
       }
@@ -10055,7 +9955,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
     return false;
-  if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
     return false;
   return true;
 }
@@ -10076,11 +9976,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       Instruction *OldCall = CS.getInstruction();
       // If the call and callee calling conventions don't match, this call must
       // be unreachable, as the call is undefined.
-      new StoreInst(Context->getConstantIntTrue(),
-                Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), 
+      new StoreInst(ConstantInt::getTrue(*Context),
+                UndefValue::get(Type::getInt1PtrTy(*Context)), 
                                   OldCall);
-      if (!OldCall->use_empty())
-        OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType()));
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
+        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
       if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
         return EraseInstFromFunction(*OldCall);
       return 0;
@@ -10090,18 +9992,20 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     // This instruction is not reachable, just remove it.  We insert a store to
     // undef so that we know that this code is not reachable, despite the fact
     // that we can't modify the CFG here.
-    new StoreInst(Context->getConstantIntTrue(),
-               Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)),
+    new StoreInst(ConstantInt::getTrue(*Context),
+               UndefValue::get(Type::getInt1PtrTy(*Context)),
                   CS.getInstruction());
 
-    if (!CS.getInstruction()->use_empty())
+    // If CS dues not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
       CS.getInstruction()->
-        replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType()));
+        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       // Don't break the CFG, insert a dummy cond branch.
       BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
-                         Context->getConstantIntTrue(), II);
+                         ConstantInt::getTrue(*Context), II);
     }
     return EraseInstFromFunction(*CS.getInstruction());
   }
@@ -10165,13 +10069,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (Callee->isDeclaration() &&
         // Conversion is ok if changing from one pointer type to another or from
         // a pointer to an integer of the same size.
-        !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
-          (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
+        !((isa<PointerType>(OldRetTy) || !TD ||
+           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+          (isa<PointerType>(NewRetTy) || !TD ||
+           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
       return false;   // Cannot transform this return value.
 
     if (!Caller->use_empty() &&
         // void -> non-void is handled specially
-        NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
       return false;   // Cannot transform this return value.
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
@@ -10212,8 +10118,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // Converting from one pointer type to another or between a pointer and an
     // integer of the same size is safe even if we do not have a body.
     bool isConvertible = ActTy == ParamTy ||
-      ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
-       (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
+      (TD && ((isa<PointerType>(ParamTy) ||
+      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+              (isa<PointerType>(ActTy) ||
+              ActTy == TD->getIntPtrType(Caller->getContext()))));
     if (Callee->isDeclaration() && !isConvertible) return false;
   }
 
@@ -10260,8 +10168,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     } else {
       Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
           false, ParamTy, false);
-      CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
-      Args.push_back(InsertNewInstBefore(NewCast, *Caller));
+      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
     }
 
     // Add any parameter attributes.
@@ -10270,26 +10177,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   }
 
   // If the function takes more arguments than the call was taking, add them
-  // now...
+  // now.
   for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
-    Args.push_back(Context->getNullValue(FT->getParamType(i)));
+    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
 
-  // If we are removing arguments to the function, emit an obnoxious warning...
+  // If we are removing arguments to the function, emit an obnoxious warning.
   if (FT->getNumParams() < NumActualArgs) {
     if (!FT->isVarArg()) {
-      cerr << "WARNING: While resolving call to function '"
-           << Callee->getName() << "' arguments were dropped!\n";
+      errs() << "WARNING: While resolving call to function '"
+             << Callee->getName() << "' arguments were dropped!\n";
     } else {
-      // Add all of the arguments in their promoted form to the arg list...
+      // Add all of the arguments in their promoted form to the arg list.
       for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
         const Type *PTy = getPromotedType((*AI)->getType());
         if (PTy != (*AI)->getType()) {
           // Must promote to pass through va_arg area!
-          Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, 
-                                                                PTy, false);
-          Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
-          InsertNewInstBefore(Cast, *Caller);
-          Args.push_back(Cast);
+          Instruction::CastOps opcode =
+            CastInst::getCastOpcode(*AI, false, PTy, false);
+          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
         } else {
           Args.push_back(*AI);
         }
@@ -10304,10 +10209,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
     attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
 
-  if (NewRetTy == Type::VoidTy)
+  if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
 
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end());
+  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+                                                     attrVec.end());
 
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -10329,7 +10235,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // Insert a cast of the return type as necessary.
   Value *NV = NC;
   if (OldRetTy != NV->getType() && !Caller->use_empty()) {
-    if (NV->getType() != Type::VoidTy) {
+    if (!NV->getType()->isVoidTy()) {
       Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
                                                             OldRetTy, false);
       NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
@@ -10343,16 +10249,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         // Otherwise, it's a call, just insert cast right after the call instr
         InsertNewInstBefore(NC, *Caller);
       }
-      AddUsersToWorkList(*Caller);
+      Worklist.AddUsersToWorkList(*Caller);
     } else {
-      NV = Context->getUndef(Caller->getType());
+      NV = UndefValue::get(Caller->getType());
     }
   }
 
-  if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+
+  if (!Caller->use_empty())
     Caller->replaceAllUsesWith(NV);
-  Caller->eraseFromParent();
-  RemoveFromWorkList(Caller);
+  
+  EraseInstFromFunction(*Caller);
   return true;
 }
 
@@ -10469,14 +10376,14 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
 
       // Replace the trampoline call with a direct call.  Let the generic
       // code sort out any function type mismatches.
-      FunctionType *NewFTy =
-                       Context->getFunctionType(FTy->getReturnType(), NewTypes, 
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
                                                 FTy->isVarArg());
       Constant *NewCallee =
-        NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ?
-        NestF : Context->getConstantExprBitCast(NestF, 
-                                         Context->getPointerTypeUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end());
+        NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, 
+                                         PointerType::getUnqual(NewFTy));
+      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+                                                   NewAttrs.end());
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -10495,10 +10402,10 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
           setCallingConv(cast<CallInst>(Caller)->getCallingConv());
         cast<CallInst>(NewCaller)->setAttributes(NewPAL);
       }
-      if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+      if (!Caller->getType()->isVoidTy())
         Caller->replaceAllUsesWith(NewCaller);
       Caller->eraseFromParent();
-      RemoveFromWorkList(Caller);
+      Worklist.Remove(Caller);
       return 0;
     }
   }
@@ -10508,13 +10415,13 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   // code sort out any function type mismatches.
   Constant *NewCallee =
     NestF->getType() == PTy ? NestF : 
-                              Context->getConstantExprBitCast(NestF, PTy);
+                              ConstantExpr::getBitCast(NestF, PTy);
   CS.setCalledFunction(NewCallee);
   return CS.getInstruction();
 }
 
-/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
-/// and if a/b/c/d and the add's all have a single use, turn this into two phi's
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
 /// and a single binop.
 Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10526,8 +10433,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   const Type *LHSType = LHSVal->getType();
   const Type *RHSType = RHSVal->getType();
   
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
     if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
@@ -10547,6 +10453,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
   }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
   
   // Otherwise, this is safe to transform!
   
@@ -10589,8 +10502,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
     return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
-  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal,
-                         RHSVal);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         LHSVal, RHSVal);
 }
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
@@ -10601,9 +10514,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // This is true if all GEP bases are allocas and if all indices into them are
   // constants.
   bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
   
-  // Scan to see if all operands are the same opcode, all have one use, and all
-  // kill their operands (i.e. the operands have one use).
+  // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
     if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
@@ -10632,7 +10549,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       
       if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
         return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
       FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
     }
   }
   
@@ -10678,8 +10604,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   }
   
   Value *Base = FixedOperands[0];
-  return GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
-                                   FixedOperands.end());
+  return cast<GEPOperator>(FirstInst)->isInBounds() ?
+    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
+                                      FixedOperands.end()) :
+    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+                              FixedOperands.end());
 }
 
 
@@ -10836,7 +10765,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
     return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                            PhiVal, ConstantOp);
   assert(isa<LoadInst>(FirstInst) && "Unknown operation");
   
@@ -10929,7 +10858,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
       PotentiallyDeadPHIs.insert(&PN);
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
-        return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
+        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
    
     // If this phi has a single use, and if that use just computes a value for
@@ -10941,7 +10870,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (PHIUser->hasOneUse() &&
         (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
         PHIUser->use_back() == &PN) {
-      return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType()));
+      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
   }
 
@@ -10982,30 +10911,14 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   return 0;
 }
 
-static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
-                                   Instruction *InsertPoint,
-                                   InstCombiner *IC) {
-  unsigned PtrSize = DTy->getScalarSizeInBits();
-  unsigned VTySize = V->getType()->getScalarSizeInBits();
-  // We must cast correctly to the pointer type. Ensure that we
-  // sign extend the integer value if it is smaller as this is
-  // used for address computation.
-  Instruction::CastOps opcode = 
-     (VTySize < PtrSize ? Instruction::SExt :
-      (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc));
-  return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint);
-}
-
-
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
-  // If so, eliminate the noop.
+  // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops.
   if (GEP.getNumOperands() == 1)
     return ReplaceInstUsesWith(GEP, PtrOp);
 
   if (isa<UndefValue>(GEP.getOperand(0)))
-    return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType()));
+    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
 
   bool HasZeroPointerIndex = false;
   if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
@@ -11015,78 +10928,48 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     return ReplaceInstUsesWith(GEP, PtrOp);
 
   // Eliminate unneeded casts for indices.
-  bool MadeChange = false;
-  
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
-       i != e; ++i, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
-      if (CastInst *CI = dyn_cast<CastInst>(*i)) {
-        if (CI->getOpcode() == Instruction::ZExt ||
-            CI->getOpcode() == Instruction::SExt) {
-          const Type *SrcTy = CI->getOperand(0)->getType();
-          // We can eliminate a cast from i32 to i64 iff the target 
-          // is a 32-bit pointer target.
-          if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
-            MadeChange = true;
-            *i = CI->getOperand(0);
-          }
-        }
-      }
+  if (TD) {
+    bool MadeChange = false;
+    unsigned PtrSize = TD->getPointerSizeInBits();
+    
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+         I != E; ++I, ++GTI) {
+      if (!isa<SequentialType>(*GTI)) continue;
+      
       // If we are using a wider index than needed for this platform, shrink it
-      // to what we need.  If narrower, sign-extend it to what we need.
-      // If the incoming value needs a cast instruction,
-      // insert it.  This explicit cast can make subsequent optimizations more
-      // obvious.
-      Value *Op = *i;
-      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
-        if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = Context->getConstantExprTrunc(C, TD->getIntPtrType());
-          MadeChange = true;
-        } else {
-          Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
-                                GEP);
-          *i = Op;
-          MadeChange = true;
-        }
-      } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) {
-        if (Constant *C = dyn_cast<Constant>(Op)) {
-          *i = Context->getConstantExprSExt(C, TD->getIntPtrType());
-          MadeChange = true;
-        } else {
-          Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(),
-                                GEP);
-          *i = Op;
-          MadeChange = true;
-        }
-      }
+      // to what we need.  If narrower, sign-extend it to what we need.  This
+      // explicit cast can make subsequent optimizations more obvious.
+      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
+      if (OpBits == PtrSize)
+        continue;
+      
+      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
+      MadeChange = true;
     }
+    if (MadeChange) return &GEP;
   }
-  if (MadeChange) return &GEP;
 
   // Combine Indices - If the source pointer to this getelementptr instruction
   // is a getelementptr instruction, combine the indices of the two
   // getelementptr instructions into a single instruction.
   //
-  SmallVector<Value*, 8> SrcGEPOperands;
-  if (User *Src = dyn_castGetElementPtr(PtrOp))
-    SrcGEPOperands.append(Src->op_begin(), Src->op_end());
-
-  if (!SrcGEPOperands.empty()) {
+  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
     // Note that if our source is a gep chain itself that we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
     //
-    if (isa<GetElementPtrInst>(SrcGEPOperands[0]) &&
-        cast<Instruction>(SrcGEPOperands[0])->getNumOperands() == 2)
-      return 0;   // Wait until our source is folded to completion.
+    if (GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+      if (SrcGEP->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
 
     SmallVector<Value*, 8> Indices;
 
     // Find out whether the last index in the source GEP is a sequential idx.
     bool EndsWithSequential = false;
-    for (gep_type_iterator I = gep_type_begin(*cast<User>(PtrOp)),
-           E = gep_type_end(*cast<User>(PtrOp)); I != E; ++I)
+    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+         I != E; ++I)
       EndsWithSequential = !isa<StructType>(*I);
 
     // Can we combine the two pointer arithmetics offsets?
@@ -11094,98 +10977,68 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // Replace: gep (gep %P, long B), long A, ...
       // With:    T = long A+B; gep %P, T, ...
       //
-      Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1);
-      if (SO1 == Context->getNullValue(SO1->getType())) {
+      Value *Sum;
+      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *GO1 = GEP.getOperand(1);
+      if (SO1 == Constant::getNullValue(SO1->getType())) {
         Sum = GO1;
-      } else if (GO1 == Context->getNullValue(GO1->getType())) {
+      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
         Sum = SO1;
       } else {
-        // If they aren't the same type, convert both to an integer of the
-        // target's pointer size.
-        if (SO1->getType() != GO1->getType()) {
-          if (Constant *SO1C = dyn_cast<Constant>(SO1)) {
-            SO1 =
-                Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true);
-          } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) {
-            GO1 =
-                Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true);
-          } else {
-            unsigned PS = TD->getPointerSizeInBits();
-            if (TD->getTypeSizeInBits(SO1->getType()) == PS) {
-              // Convert GO1 to SO1's type.
-              GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this);
-
-            } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) {
-              // Convert SO1 to GO1's type.
-              SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this);
-            } else {
-              const Type *PT = TD->getIntPtrType();
-              SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this);
-              GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this);
-            }
-          }
-        }
-        if (isa<Constant>(SO1) && isa<Constant>(GO1))
-          Sum = Context->getConstantExprAdd(cast<Constant>(SO1), 
-                                            cast<Constant>(GO1));
-        else {
-          Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
-          InsertNewInstBefore(cast<Instruction>(Sum), GEP);
-        }
+        // If they aren't the same type, then the input hasn't been processed
+        // by the loop above yet (which canonicalizes sequential index types to
+        // intptr_t).  Just avoid transforming this until the input has been
+        // normalized.
+        if (SO1->getType() != GO1->getType())
+          return 0;
+        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
       }
 
-      // Recycle the GEP we already have if possible.
-      if (SrcGEPOperands.size() == 2) {
-        GEP.setOperand(0, SrcGEPOperands[0]);
+      // Update the GEP in place if possible.
+      if (Src->getNumOperands() == 2) {
+        GEP.setOperand(0, Src->getOperand(0));
         GEP.setOperand(1, Sum);
         return &GEP;
-      } else {
-        Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
-                       SrcGEPOperands.end()-1);
-        Indices.push_back(Sum);
-        Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end());
       }
+      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.push_back(Sum);
+      Indices.append(GEP.op_begin()+2, GEP.op_end());
     } else if (isa<Constant>(*GEP.idx_begin()) &&
                cast<Constant>(*GEP.idx_begin())->isNullValue() &&
-               SrcGEPOperands.size() != 1) {
+               Src->getNumOperands() != 1) {
       // Otherwise we can do the fold if the first index of the GEP is a zero
-      Indices.insert(Indices.end(), SrcGEPOperands.begin()+1,
-                     SrcGEPOperands.end());
-      Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end());
+      Indices.append(Src->op_begin()+1, Src->op_end());
+      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
     }
 
     if (!Indices.empty())
-      return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(),
-                                       Indices.end(), GEP.getName());
-
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
-    // GEP of global variable.  If all of the indices for this GEP are
-    // constants, we can promote this to a constexpr instead of an instruction.
-
-    // Scan for nonconstants...
-    SmallVector<Constant*, 8> Indices;
-    User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end();
-    for (; I != E && isa<Constant>(*I); ++I)
-      Indices.push_back(cast<Constant>(*I));
-
-    if (I == E) {  // If they are all constants...
-      Constant *CE = Context->getConstantExprGetElementPtr(GV,
-                                                    &Indices[0],Indices.size());
-
-      // Replace all uses of the GEP with the new constexpr...
-      return ReplaceInstUsesWith(GEP, CE);
-    }
-  } else if (Value *X = getBitCastOperand(PtrOp)) {  // Is the operand a cast?
-    if (!isa<PointerType>(X->getType())) {
-      // Not interesting.  Source pointer must be a cast from pointer.
-    } else if (HasZeroPointerIndex) {
-      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
-      // into     : GEP [10 x i8]* X, i32 0, ...
-      //
-      // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
-      //           into     : GEP i8* X, ...
-      // 
-      // This occurs when the program declares an array extern like "int X[];"
+      return (cast<GEPOperator>(&GEP)->isInBounds() &&
+              Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
+        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+                                  Indices.end(), GEP.getName());
+  }
+  
+  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+  if (Value *X = getBitCastOperand(PtrOp)) {
+    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
+
+    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 
+    // want to change the gep until the bitcasts are eliminated.
+    if (getBitCastOperand(X)) {
+      Worklist.AddValue(PtrOp);
+      return 0;
+    }
+    
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    // 
+    // This occurs when the program declares an array extern like "int X[];"
+    if (HasZeroPointerIndex) {
       const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
       const PointerType *XTy = cast<PointerType>(X->getType());
       if (const ArrayType *CATy =
@@ -11194,10 +11047,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         if (CATy->getElementType() == XTy->getElementType()) {
           // -> GEP i8* X, ...
           SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
-          return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
-                                           GEP.getName());
-        } else if (const ArrayType *XATy =
-                 dyn_cast<ArrayType>(XTy->getElementType())) {
+          return cast<GEPOperator>(&GEP)->isInBounds() ?
+            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
+                                              GEP.getName()) :
+            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
+                                      GEP.getName());
+        }
+        
+        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
           // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
           if (CATy->getElementType() == XATy->getElementType()) {
             // -> GEP [10 x i8]* X, i32 0, ...
@@ -11216,16 +11073,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
       const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
-      if (isa<ArrayType>(SrcElTy) &&
+      if (TD && isa<ArrayType>(SrcElTy) &&
           TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
         Value *Idx[2];
-        Idx[0] = Context->getNullValue(Type::Int32Ty);
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
         Idx[1] = GEP.getOperand(1);
-        Value *V = InsertNewInstBefore(
-               GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
+        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
+          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
         // V and GEP are both pointer types --> BitCast
-        return new BitCastInst(V, GEP.getType());
+        return new BitCastInst(NewGEP, GEP.getType());
       }
       
       // Transform things like:
@@ -11233,7 +11091,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
       
-      if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
+      if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) {
         uint64_t ArrayEltSize =
             TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
         
@@ -11243,17 +11101,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         ConstantInt *Scale = 0;
         if (ArrayEltSize == 1) {
           NewIdx = GEP.getOperand(1);
-          Scale = 
-               Context->getConstantInt(cast<IntegerType>(NewIdx->getType()), 1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
         } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
-          NewIdx = Context->getConstantInt(CI->getType(), 1);
+          NewIdx = ConstantInt::get(CI->getType(), 1);
           Scale = CI;
         } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
           if (Inst->getOpcode() == Instruction::Shl &&
               isa<ConstantInt>(Inst->getOperand(1))) {
             ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
             uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = Context->getConstantInt(cast<IntegerType>(Inst->getType()),
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
                                      1ULL << ShAmtVal);
             NewIdx = Inst->getOperand(0);
           } else if (Inst->getOpcode() == Instruction::Mul &&
@@ -11269,23 +11126,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // operation after making sure Scale doesn't have the sign bit set.
         if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
             Scale->getZExtValue() % ArrayEltSize == 0) {
-          Scale = Context->getConstantInt(Scale->getType(),
+          Scale = ConstantInt::get(Scale->getType(),
                                    Scale->getZExtValue() / ArrayEltSize);
           if (Scale->getZExtValue() != 1) {
-            Constant *C =
-                   Context->getConstantExprIntegerCast(Scale, NewIdx->getType(),
+            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
                                                        false /*ZExt*/);
-            Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
-            NewIdx = InsertNewInstBefore(Sc, GEP);
+            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
           }
 
           // Insert the new GEP instruction.
           Value *Idx[2];
-          Idx[0] = Context->getNullValue(Type::Int32Ty);
+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
           Idx[1] = NewIdx;
-          Instruction *NewGEP =
-            GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
-          NewGEP = InsertNewInstBefore(NewGEP, GEP);
+          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
+            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
           // The NewGEP must be pointer typed, so must the old one -> BitCast
           return new BitCastInst(NewGEP, GEP.getType());
         }
@@ -11294,12 +11149,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   }
   
   /// See if we can simplify:
-  ///   X = bitcast A to B*
+  ///   X = bitcast A* to B*
   ///   Y = gep X, <...constant indices...>
   /// into a gep of the original struct.  This is important for SROA and alias
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
-    if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+    if (TD &&
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
       // Determine how much the GEP moves the pointer.  We are guaranteed to get
       // a constant back from EmitGEPOffset.
       ConstantInt *OffsetV =
@@ -11311,7 +11167,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (Offset == 0) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
-        if (isa<AllocationInst>(BCI->getOperand(0))) {
+        if (isa<AllocationInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
@@ -11332,11 +11189,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       const Type *InTy =
         cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
       if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) {
-        Instruction *NGEP =
-           GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(),
-                                     NewIndices.end());
-        if (NGEP->getType() == GEP.getType()) return NGEP;
-        InsertNewInstBefore(NGEP, GEP);
+        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
+        
+        if (NGEP->getType() == GEP.getType())
+          return ReplaceInstUsesWith(GEP, NGEP);
         NGEP->takeName(&GEP);
         return new BitCastInst(NGEP, GEP.getType());
       }
@@ -11351,18 +11211,17 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
-        Context->getArrayType(AI.getAllocatedType(), C->getZExtValue());
+        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       AllocationInst *New = 0;
 
       // Create and insert the replacement instruction...
       if (isa<MallocInst>(AI))
-        New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName());
+        New = Builder->CreateMalloc(NewTy, 0, AI.getName());
       else {
         assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-        New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName());
+        New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       }
-
-      InsertNewInstBefore(New, AI);
+      New->setAlignment(AI.getAlignment());
 
       // Scan to the end of the allocation instructions, to skip over a block of
       // allocas if possible...also skip interleaved debug info
@@ -11373,27 +11232,27 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Value *NullIdx = Context->getNullValue(Type::Int32Ty);
+      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context));
       Value *Idx[2];
       Idx[0] = NullIdx;
       Idx[1] = NullIdx;
-      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
-                                           New->getName()+".sub", It);
+      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                                   New->getName()+".sub", It);
 
       // Now make everything use the getelementptr instead of the original
       // allocation.
       return ReplaceInstUsesWith(AI, V);
     } else if (isa<UndefValue>(AI.getArraySize())) {
-      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
     }
   }
 
-  if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
     // If alloca'ing a zero byte object, replace the alloca with a null pointer.
     // Note that we only do this for alloca's, because malloc should allocate
     // and return a unique pointer, even for a zero byte allocation.
     if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
-      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType()));
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
 
     // If the alignment is 0 (unspecified), assign it the preferred alignment.
     if (AI.getAlignment() == 0)
@@ -11409,8 +11268,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
-    new StoreInst(Context->getConstantIntTrue(),
-           Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI);
+    new StoreInst(ConstantInt::getTrue(*Context),
+           UndefValue::get(Type::getInt1PtrTy(*Context)), &FI);
     return EraseInstFromFunction(FI);
   }
   
@@ -11428,7 +11287,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   // Change free (gep X, 0,0,0,0) into free(X)
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
     if (GEPI->hasAllZeroIndices()) {
-      AddToWorkList(GEPI);
+      Worklist.Add(GEPI);
       FI.setOperand(0, GEPI->getOperand(0));
       return &FI;
     }
@@ -11440,6 +11299,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
       EraseInstFromFunction(FI);
       return EraseInstFromFunction(*MI);
     }
+  if (isMalloc(Op)) {
+    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
+      if (Op->hasOneUse() && CI->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        EraseInstFromFunction(*CI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    } else {
+      // Op is a call to malloc
+      if (Op->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    }
+  }
 
   return 0;
 }
@@ -11450,7 +11324,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
                                         const TargetData *TD) {
   User *CI = cast<User>(LI.getOperand(0));
   Value *CastOp = CI->getOperand(0);
-  LLVMContext* Context = IC.getContext();
+  LLVMContext *Context = IC.getContext();
 
   if (TD) {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
@@ -11479,7 +11353,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
             SingleChar = 0;
             StrVal = (StrVal << 8) | SingleChar;
           }
-          Value *NL = Context->getConstantInt(StrVal);
+          Value *NL = ConstantInt::get(*Context, StrVal);
           return IC.ReplaceInstUsesWith(LI, NL);
         }
       }
@@ -11505,26 +11379,26 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
             Value *Idxs[2];
-            Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty);
-            CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2);
+            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
             SrcTy = cast<PointerType>(CastOp->getType());
             SrcPTy = SrcTy->getElementType();
           }
 
-      if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
+      if (IC.getTargetData() &&
+          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
             isa<VectorType>(SrcPTy)) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
           (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
-          IC.getTargetData().getTypeSizeInBits(SrcPTy) ==
-               IC.getTargetData().getTypeSizeInBits(DestPTy)) {
+          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
 
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp,
-                                                             CI->getName(),
-                                                         LI.isVolatile()),LI);
+        Value *NewLoad = 
+          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
         // Now cast the result of the load.
         return new BitCastInst(NewLoad, LI.getType());
       }
@@ -11537,14 +11411,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign =
-    GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
-  if (KnownAlign >
-      (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
-                                LI.getAlignment()))
-    LI.setAlignment(KnownAlign);
-
-  // load (cast X) --> cast (load X) iff safe
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+    if (KnownAlign >
+        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+                                  LI.getAlignment()))
+      LI.setAlignment(KnownAlign);
+  }
+
+  // load (cast X) --> cast (load X) iff safe.
   if (isa<CastInst>(Op))
     if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
       return Res;
@@ -11562,29 +11438,28 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
     const Value *GEPI0 = GEPI->getOperand(0);
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(GEPI0) &&
-        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
       // Insert a new store to null instruction before the load to indicate
       // that this code is not reachable.  We do this instead of inserting
       // an unreachable instruction directly because we cannot modify the
       // CFG.
-      new StoreInst(Context->getUndef(LI.getType()),
-                    Context->getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
   } 
 
   if (Constant *C = dyn_cast<Constant>(Op)) {
     // load null/undef -> undef
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<UndefValue>(C) || (C->isNullValue() && 
-        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
+    if (isa<UndefValue>(C) ||
+        (C->isNullValue() && LI.getPointerAddressSpace() == 0)) {
       // Insert a new store to null instruction before the load to indicate that
       // this code is not reachable.  We do this instead of inserting an
       // unreachable instruction directly because we cannot modify the CFG.
-      new StoreInst(Context->getUndef(LI.getType()),
-                    Context->getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
 
     // Instcombine load (constant global) into the value loaded.
@@ -11605,9 +11480,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
           // that this code is not reachable.  We do this instead of inserting
           // an unreachable instruction directly because we cannot modify the
           // CFG.
-          new StoreInst(Context->getUndef(LI.getType()),
-                        Context->getNullValue(Op->getType()), &LI);
-          return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+          new StoreInst(UndefValue::get(LI.getType()),
+                        Constant::getNullValue(Op->getType()), &LI);
+          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
         }
 
       } else if (CE->isCast()) {
@@ -11622,9 +11497,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
     if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
       if (GV->getInitializer()->isNullValue())
-        return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType()));
+        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
       else if (isa<UndefValue>(GV->getInitializer()))
-        return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType()));
+        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
   }
 
@@ -11643,10 +11518,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
           isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
-        Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1),
-                                     SI->getOperand(1)->getName()+".val"), LI);
-        Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2),
-                                     SI->getOperand(2)->getName()+".val"), LI);
+        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                        SI->getOperand(1)->getName()+".val");
+        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                        SI->getOperand(2)->getName()+".val");
         return SelectInst::Create(SI->getCondition(), V1, V2);
       }
 
@@ -11674,7 +11549,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   User *CI = cast<User>(SI.getOperand(1));
   Value *CastOp = CI->getOperand(0);
-  LLVMContext* Context = IC.getContext();
 
   const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
@@ -11696,7 +11570,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   // constants.
   if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
     // Index through pointer.
-    Constant *Zero = Context->getNullValue(Type::Int32Ty);
+    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext()));
     NewGEPIndices.push_back(Zero);
     
     while (1) {
@@ -11713,7 +11587,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
       }
     }
     
-    SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace());
+    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
@@ -11721,10 +11595,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   // If the pointers point into different address spaces or if they point to
   // values with different sizes, we can't do the transformation.
-  if (SrcTy->getAddressSpace() != 
+  if (!IC.getTargetData() ||
+      SrcTy->getAddressSpace() != 
         cast<PointerType>(CI->getType())->getAddressSpace() ||
-      IC.getTargetData().getTypeSizeInBits(SrcPTy) !=
-      IC.getTargetData().getTypeSizeInBits(DestPTy))
+      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+      IC.getTargetData()->getTypeSizeInBits(DestPTy))
     return 0;
 
   // Okay, we are casting from one integer or pointer type to another of
@@ -11745,22 +11620,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   // SIOp0 is a pointer to aggregate and this is a store to the first field,
   // emit a GEP to index into its first field.
-  if (!NewGEPIndices.empty()) {
-    if (Constant *C = dyn_cast<Constant>(CastOp))
-      CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0], 
-                                              NewGEPIndices.size());
-    else
-      CastOp = IC.InsertNewInstBefore(
-              GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(),
-                                        NewGEPIndices.end()), SI);
-  }
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
   
-  if (Constant *C = dyn_cast<Constant>(SIOp0))
-    NewCast = Context->getConstantExprCast(opcode, C, CastDstTy);
-  else
-    NewCast = IC.InsertNewInstBefore(
-      CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
-      SI);
+  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+                                   SIOp0->getName()+".c");
   return new StoreInst(NewCast, CastOp);
 }
 
@@ -11777,12 +11642,16 @@ static bool equivalentAddressValues(Value *A, Value *B) {
   if (A == B) return true;
   
   // Test if the values come form identical arithmetic instructions.
+  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+  // its only used to compare two uses within the same basic block, which
+  // means that they'll always either have the same value or one of them
+  // will have an undefined value.
   if (isa<BinaryOperator>(A) ||
       isa<CastInst>(A) ||
       isa<PHINode>(A) ||
       isa<GetElementPtrInst>(A))
     if (Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalTo(BI))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
   
   // Otherwise they may not be equivalent.
@@ -11854,12 +11723,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   }
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign =
-    GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
-  if (KnownAlign >
-      (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
-                                SI.getAlignment()))
-    SI.setAlignment(KnownAlign);
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+    if (KnownAlign >
+        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+                                  SI.getAlignment()))
+      SI.setAlignment(KnownAlign);
+  }
 
   // Do really simple DSE, to catch cases where there are several consecutive
   // stores to the same location, separated by a few arithmetic operations. This
@@ -11914,12 +11785,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
 
   // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr) &&
-      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
     if (!isa<UndefValue>(Val)) {
-      SI.setOperand(0, Context->getUndef(Val->getType()));
+      SI.setOperand(0, UndefValue::get(Val->getType()));
       if (Instruction *U = dyn_cast<Instruction>(Val))
-        AddToWorkList(U);  // Dropped a use.
+        Worklist.Add(U);  // Dropped a use.
       ++NumCombined;
     }
     return 0;  // Do not modify these!
@@ -12096,41 +11966,34 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
   // Cannonicalize fcmp_one -> fcmp_oeq
   FCmpInst::Predicate FPred; Value *Y;
   if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
-                             TrueDest, FalseDest)))
-    if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
-         FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) {
-      FCmpInst *I = cast<FCmpInst>(BI.getCondition());
-      FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred);
-      Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I);
-      NewSCC->takeName(I);
-      // Swap Destinations and condition...
-      BI.setCondition(NewSCC);
+                             TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+        FPred == FCmpInst::FCMP_OGE) {
+      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+      
+      // Swap Destinations and condition.
       BI.setSuccessor(0, FalseDest);
       BI.setSuccessor(1, TrueDest);
-      RemoveFromWorkList(I);
-      I->eraseFromParent();
-      AddToWorkList(NewSCC);
+      Worklist.Add(Cond);
       return &BI;
     }
 
   // Cannonicalize icmp_ne -> icmp_eq
   ICmpInst::Predicate IPred;
   if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
-                      TrueDest, FalseDest)))
-    if ((IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
-         IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
-         IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) {
-      ICmpInst *I = cast<ICmpInst>(BI.getCondition());
-      ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred);
-      Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I);
-      NewSCC->takeName(I);
-      // Swap Destinations and condition...
-      BI.setCondition(NewSCC);
+                      TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
+        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+        IPred == ICmpInst::ICMP_SGE) {
+      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+      // Swap Destinations and condition.
       BI.setSuccessor(0, FalseDest);
       BI.setSuccessor(1, TrueDest);
-      RemoveFromWorkList(I);
-      I->eraseFromParent();;
-      AddToWorkList(NewSCC);
+      Worklist.Add(Cond);
       return &BI;
     }
 
@@ -12145,10 +12008,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
         // change 'switch (X+4) case 1:' into 'switch (X) case -3'
         for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
           SI.setOperand(i,
-                   Context->getConstantExprSub(cast<Constant>(SI.getOperand(i)),
+                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
                                                 AddRHS));
         SI.setOperand(0, I->getOperand(0));
-        AddToWorkList(I);
+        Worklist.Add(I);
         return &SI;
       }
   }
@@ -12163,10 +12026,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
 
   if (Constant *C = dyn_cast<Constant>(Agg)) {
     if (isa<UndefValue>(C))
-      return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType()));
+      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
       
     if (isa<ConstantAggregateZero>(C))
-      return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType()));
+      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
 
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
       // Extract the element indexed by the first index out of the constant
@@ -12214,10 +12077,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // %E = insertvalue { i32 } %X, i32 42, 0
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
-      Value *NewEV = InsertNewInstBefore(
-        ExtractValueInst::Create(IV->getAggregateOperand(),
-                                 EV.idx_begin(), EV.idx_end()),
-        EV);
+      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+                                                 EV.idx_begin(), EV.idx_end());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
                                      insi, inse);
     }
@@ -12303,17 +12164,17 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
 /// value is already around as a register, for example if it were inserted then
 /// extracted from the vector.
 static Value *FindScalarElement(Value *V, unsigned EltNo,
-                                LLVMContext* Context) {
+                                LLVMContext *Context) {
   assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
   const VectorType *PTy = cast<VectorType>(V->getType());
   unsigned Width = PTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
-    return Context->getUndef(PTy->getElementType());
+    return UndefValue::get(PTy->getElementType());
   
   if (isa<UndefValue>(V))
-    return Context->getUndef(PTy->getElementType());
+    return UndefValue::get(PTy->getElementType());
   else if (isa<ConstantAggregateZero>(V))
-    return Context->getNullValue(PTy->getElementType());
+    return Constant::getNullValue(PTy->getElementType());
   else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
     return CP->getOperand(EltNo);
   else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
@@ -12339,7 +12200,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,
     else if (InEl < LHSWidth*2)
       return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context);
     else
-      return Context->getUndef(PTy->getElementType());
+      return UndefValue::get(PTy->getElementType());
   }
   
   // Otherwise, we don't know.
@@ -12349,18 +12210,18 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is undef, replace extract with scalar undef.
   if (isa<UndefValue>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
 
   // If vector val is constant 0, replace extract with scalar 0.
   if (isa<ConstantAggregateZero>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType()));
+    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
   
   if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
     // If vector val is constant with all elements the same, replace EI with
     // that element. When the elements are not identical, we cannot replace yet
     // (we do that below, but only when the index is constant).
     Constant *op0 = C->getOperand(0);
-    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
       if (C->getOperand(i) != op0) {
         op0 = 0; 
         break;
@@ -12373,13 +12234,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // find a previously computed scalar that was inserted into the vector.
   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
     unsigned IndexVal = IdxC->getZExtValue();
-    unsigned VectorWidth = 
-      cast<VectorType>(EI.getOperand(0)->getType())->getNumElements();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
       
     // If this is extracting an invalid index, turn this into undef, to avoid
     // crashing the code below.
     if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
     
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
@@ -12411,42 +12271,27 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   }
   
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
-    if (I->hasOneUse()) {
-      // Push extractelement into predecessor operation if legal and
-      // profitable to do so
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-        bool isConstantElt = isa<ConstantInt>(EI.getOperand(1));
-        if (CheapToScalarize(BO, isConstantElt)) {
-          ExtractElementInst *newEI0 = 
-            new ExtractElementInst(BO->getOperand(0), EI.getOperand(1),
-                                   EI.getName()+".lhs");
-          ExtractElementInst *newEI1 =
-            new ExtractElementInst(BO->getOperand(1), EI.getOperand(1),
-                                   EI.getName()+".rhs");
-          InsertNewInstBefore(newEI0, EI);
-          InsertNewInstBefore(newEI1, EI);
-          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
-        }
-      } else if (isa<LoadInst>(I)) {
-        unsigned AS = 
-          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
-        Value *Ptr = InsertBitCastBefore(I->getOperand(0),
-                                  Context->getPointerType(EI.getType(), AS),EI);
-        GetElementPtrInst *GEP =
-          GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
-        InsertNewInstBefore(GEP, EI);
-        return new LoadInst(GEP);
-      }
-    }
-    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
+        Value *newEI1 =
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
       // Extracting the inserted element?
       if (IE->getOperand(2) == EI.getOperand(1))
         return ReplaceInstUsesWith(EI, IE->getOperand(1));
       // If the inserted and extracted elements are constants, they must not
       // be the same value, extract from the pre-inserted value instead.
-      if (isa<Constant>(IE->getOperand(2)) &&
-          isa<Constant>(EI.getOperand(1))) {
-        AddUsesToWorkList(EI);
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+        Worklist.AddValue(EI.getOperand(0));
         EI.setOperand(0, IE->getOperand(0));
         return &EI;
       }
@@ -12465,11 +12310,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           SrcIdx -= LHSWidth;
           Src = SVI->getOperand(1);
         } else {
-          return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType()));
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
         }
-        return new ExtractElementInst(Src, SrcIdx);
+        return ExtractElementInst::Create(Src,
+                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx,
+                                          false));
       }
     }
+    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
   }
   return 0;
 }
@@ -12479,21 +12327,21 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                          std::vector<Constant*> &Mask,
-                                         LLVMContext* Context) {
+                                         LLVMContext *Context) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
     return true;
   } else if (V == LHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
     return true;
   } else if (V == RHS) {
     for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts));
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts));
     return true;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12510,7 +12358,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
       // transitively ok.
       if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
         // If so, update the mask to reflect the inserted undef.
-        Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty);
+        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context));
         return true;
       }      
     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
@@ -12527,11 +12375,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
             // If so, update the mask to reflect the inserted value.
             if (EI->getOperand(0) == LHS) {
               Mask[InsertedIdx % NumElts] = 
-                 Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
+                 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx);
             } else {
               assert(EI->getOperand(0) == RHS);
               Mask[InsertedIdx % NumElts] = 
-                Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts);
+                ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts);
               
             }
             return true;
@@ -12549,17 +12397,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
 /// that computes V and the LHS value of the shuffle.
 static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
-                                     Value *&RHS, LLVMContext* Context) {
+                                     Value *&RHS, LLVMContext *Context) {
   assert(isa<VectorType>(V->getType()) && 
          (RHS == 0 || V->getType() == RHS->getType()) &&
          "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty));
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
     return V;
   } else if (isa<ConstantAggregateZero>(V)) {
-    Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0));
+    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0));
     return V;
   } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
@@ -12580,7 +12428,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
           RHS = EI->getOperand(0);
           Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context);
           Mask[InsertedIdx % NumElts] = 
-            Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx);
+            ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx);
           return V;
         }
         
@@ -12590,7 +12438,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
-              Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i);
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i);
           }
           return V;
         }
@@ -12608,7 +12456,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
   
   // Otherwise, can't do anything fancy.  Return an identity vector.
   for (unsigned i = 0; i != NumElts; ++i)
-    Mask.push_back(Context->getConstantInt(Type::Int32Ty, i));
+    Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
   return V;
 }
 
@@ -12635,45 +12483,23 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
         return ReplaceInstUsesWith(IE, VecOp);
       
       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
-        return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType()));
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
       
       // If we are extracting a value from a vector, then inserting it right
       // back into the same place, just use the input vector.
       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
         return ReplaceInstUsesWith(IE, VecOp);      
       
-      // We could theoretically do this for ANY input.  However, doing so could
-      // turn chains of insertelement instructions into a chain of shufflevector
-      // instructions, and right now we do not merge shufflevectors.  As such,
-      // only do this in a situation where it is clear that there is benefit.
-      if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
-        // Turn this into shuffle(EIOp0, VecOp, Mask).  The result has all of
-        // the values of VecOp, except then one read from EIOp0.
-        // Build a new shuffle mask.
-        std::vector<Constant*> Mask;
-        if (isa<UndefValue>(VecOp))
-          Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty));
-        else {
-          assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
-          Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty,
-                                                       NumVectorElts));
-        } 
-        Mask[InsertedIdx] = 
-                           Context->getConstantInt(Type::Int32Ty, ExtractedIdx);
-        return new ShuffleVectorInst(EI->getOperand(0), VecOp,
-                                     Context->getConstantVector(Mask));
-      }
-      
       // If this insertelement isn't used by some other insertelement, turn it
       // (and any insertelements it points to), into one big shuffle.
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
         std::vector<Constant*> Mask;
         Value *RHS = 0;
         Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context);
-        if (RHS == 0) RHS = Context->getUndef(LHS->getType());
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
         // We now have a shuffle of LHS, RHS, Mask.
         return new ShuffleVectorInst(LHS, RHS,
-                                     Context->getConstantVector(Mask));
+                                     ConstantVector::get(Mask));
       }
     }
   }
@@ -12697,7 +12523,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(SVI.getOperand(2)))
-    return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType()));
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
 
   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
 
@@ -12724,21 +12550,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     std::vector<Constant*> Elts;
     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
       if (Mask[i] >= 2*e)
-        Elts.push_back(Context->getUndef(Type::Int32Ty));
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
       else {
         if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
             (Mask[i] <  e && isa<UndefValue>(LHS))) {
           Mask[i] = 2*e;     // Turn into undef.
-          Elts.push_back(Context->getUndef(Type::Int32Ty));
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
         } else {
           Mask[i] = Mask[i] % e;  // Force to LHS.
-          Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i]));
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i]));
         }
       }
     }
     SVI.setOperand(0, SVI.getOperand(1));
-    SVI.setOperand(1, Context->getUndef(RHS->getType()));
-    SVI.setOperand(2, Context->getConstantVector(Elts));
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, ConstantVector::get(Elts));
     LHS = SVI.getOperand(0);
     RHS = SVI.getOperand(1);
     MadeChange = true;
@@ -12788,14 +12614,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
         std::vector<Constant*> Elts;
         for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
           if (NewMask[i] >= LHSInNElts*2) {
-            Elts.push_back(Context->getUndef(Type::Int32Ty));
+            Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
           } else {
-            Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i]));
+            Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
           }
         }
         return new ShuffleVectorInst(LHSSVI->getOperand(0),
                                      LHSSVI->getOperand(1),
-                                     Context->getConstantVector(Elts));
+                                     ConstantVector::get(Elts));
       }
     }
   }
@@ -12855,6 +12681,9 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
                                        const TargetData *TD) {
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
+  
+  std::vector<Instruction*> InstrsForInstCombineWorklist;
+  InstrsForInstCombineWorklist.reserve(128);
 
   while (!Worklist.empty()) {
     BB = Worklist.back();
@@ -12863,44 +12692,28 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
     // We have now visited this block!  If we've already been here, ignore it.
     if (!Visited.insert(BB)) continue;
 
-    DbgInfoIntrinsic *DBI_Prev = NULL;
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
       Instruction *Inst = BBI++;
       
       // DCE instruction if trivially dead.
       if (isInstructionTriviallyDead(Inst)) {
         ++NumDeadInst;
-        DOUT << "IC: DCE: " << *Inst;
+        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
         Inst->eraseFromParent();
         continue;
       }
       
       // ConstantProp instruction if trivially constant.
-      if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
-        DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst;
+      if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                     << *Inst << '\n');
         Inst->replaceAllUsesWith(C);
         ++NumConstProp;
         Inst->eraseFromParent();
         continue;
       }
-     
-      // If there are two consecutive llvm.dbg.stoppoint calls then
-      // it is likely that the optimizer deleted code in between these
-      // two intrinsics. 
-      DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst);
-      if (DBI_Next) {
-        if (DBI_Prev
-            && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint
-            && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) {
-          IC.RemoveFromWorkList(DBI_Prev);
-          DBI_Prev->eraseFromParent();
-        }
-        DBI_Prev = DBI_Next;
-      } else {
-        DBI_Prev = 0;
-      }
 
-      IC.AddToWorkList(Inst);
+      InstrsForInstCombineWorklist.push_back(Inst);
     }
 
     // Recursively visit successors.  If this is a branch or switch on a
@@ -12932,14 +12745,22 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
       Worklist.push_back(TI->getSuccessor(i));
   }
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
 }
 
 bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
-  bool Changed = false;
-  TD = &getAnalysis<TargetData>();
+  MadeIRChange = false;
+  TD = getAnalysisIfAvailable<TargetData>();
   
-  DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
-             << F.getNameStr() << "\n");
+  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+        << F.getNameStr() << "\n");
 
   {
     // Do a depth-first traversal of the function, populate the worklist with
@@ -12957,71 +12778,73 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         while (Term != BB->begin()) {   // Remove instrs bottom-up
           BasicBlock::iterator I = Term; --I;
 
-          DOUT << "IC: DCE: " << *I;
+          DEBUG(errs() << "IC: DCE: " << *I << '\n');
           // A debug intrinsic shouldn't force another iteration if we weren't
           // going to do one without it.
           if (!isa<DbgInfoIntrinsic>(I)) {
             ++NumDeadInst;
-            Changed = true;
+            MadeIRChange = true;
           }
-          if (!I->use_empty())
-            I->replaceAllUsesWith(Context->getUndef(I->getType()));
+
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
           I->eraseFromParent();
         }
       }
   }
 
-  while (!Worklist.empty()) {
-    Instruction *I = RemoveOneFromWorkList();
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.RemoveOne();
     if (I == 0) continue;  // skip null values.
 
     // Check to see if we can DCE the instruction.
     if (isInstructionTriviallyDead(I)) {
-      // Add operands to the worklist.
-      if (I->getNumOperands() < 4)
-        AddUsesToWorkList(*I);
+      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      EraseInstFromFunction(*I);
       ++NumDeadInst;
-
-      DOUT << "IC: DCE: " << *I;
-
-      I->eraseFromParent();
-      RemoveFromWorkList(I);
-      Changed = true;
+      MadeIRChange = true;
       continue;
     }
 
     // Instruction isn't dead, see if we can constant propagate it.
-    if (Constant *C = ConstantFoldInstruction(I, TD)) {
-      DOUT << "IC: ConstFold to: " << *C << " from: " << *I;
+    if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+      DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
       // Add operands to the worklist.
-      AddUsesToWorkList(*I);
       ReplaceInstUsesWith(*I, C);
-
       ++NumConstProp;
-      I->eraseFromParent();
-      RemoveFromWorkList(I);
-      Changed = true;
+      EraseInstFromFunction(*I);
+      MadeIRChange = true;
       continue;
     }
 
-    if (TD &&
-        (I->getType()->getTypeID() == Type::VoidTyID ||
-         I->isTrapping())) {
+    if (TD) {
       // See if we can constant fold its operands.
       for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
-          if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+          if (Constant *NewC = ConstantFoldConstantExpression(CE,   
+                                  F.getContext(), TD))
             if (NewC != CE) {
-              i->set(NewC);
-              Changed = true;
+              *i = NewC;
+              MadeIRChange = true;
             }
     }
 
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
       BasicBlock *BB = I->getParent();
-      BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
       if (UserParent != BB) {
         bool UserIsSuccessor = false;
         // See if the user is one of our successors.
@@ -13034,31 +12857,34 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         // If the user is one of our immediate successors, and if that successor
         // only has us as a predecessors (we'd have to split the critical edge
         // otherwise), we can keep going.
-        if (UserIsSuccessor && !isa<PHINode>(I->use_back()) &&
-            next(pred_begin(UserParent)) == pred_end(UserParent))
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
           // Okay, the CFG is simple enough, try to sink this instruction.
-          Changed |= TryToSinkInstruction(I, UserParent);
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
       }
     }
 
-    // Now that we have an instruction, try combining it to simplify it...
+    // Now that we have an instruction, try combining it to simplify it.
+    Builder->SetInsertPoint(I->getParent(), I);
+    
 #ifndef NDEBUG
     std::string OrigI;
 #endif
-    DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str(););
+    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
     if (Instruction *Result = visit(*I)) {
       ++NumCombined;
       // Should we replace the old instruction with a new one?
       if (Result != I) {
-        DOUT << "IC: Old = " << *I
-             << "    New = " << *Result;
+        DEBUG(errs() << "IC: Old = " << *I << '\n'
+                     << "    New = " << *Result << '\n');
 
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
 
         // Push the new instruction and any users onto the worklist.
-        AddToWorkList(Result);
-        AddUsersToWorkList(*Result);
+        Worklist.Add(Result);
+        Worklist.AddUsersToWorkList(*Result);
 
         // Move the name to the new instruction first.
         Result->takeName(I);
@@ -13073,52 +12899,42 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
         InstParent->getInstList().insert(InsertPos, Result);
 
-        // Make sure that we reprocess all operands now that we reduced their
-        // use counts.
-        AddUsesToWorkList(*I);
-
-        // Instructions can end up on the worklist more than once.  Make sure
-        // we do not process an instruction that has been deleted.
-        RemoveFromWorkList(I);
-
-        // Erase the old instruction.
-        InstParent->getInstList().erase(I);
+        EraseInstFromFunction(*I);
       } else {
 #ifndef NDEBUG
-        DOUT << "IC: Mod = " << OrigI
-             << "    New = " << *I;
+        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+                     << "    New = " << *I << '\n');
 #endif
 
         // If the instruction was modified, it's possible that it is now dead.
         // if so, remove it.
         if (isInstructionTriviallyDead(I)) {
-          // Make sure we process all operands now that we are reducing their
-          // use counts.
-          AddUsesToWorkList(*I);
-
-          // Instructions may end up in the worklist more than once.  Erase all
-          // occurrences of this instruction.
-          RemoveFromWorkList(I);
-          I->eraseFromParent();
+          EraseInstFromFunction(*I);
         } else {
-          AddToWorkList(I);
-          AddUsersToWorkList(*I);
+          Worklist.Add(I);
+          Worklist.AddUsersToWorkList(*I);
         }
       }
-      Changed = true;
+      MadeIRChange = true;
     }
   }
 
-  assert(WorklistMap.empty() && "Worklist empty, but map not?");
-    
-  // Do an explicit clear, this shrinks the map if needed.
-  WorklistMap.clear();
-  return Changed;
+  Worklist.Zap();
+  return MadeIRChange;
 }
 
 
 bool InstCombiner::runOnFunction(Function &F) {
   MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  Context = &F.getContext();
+  
+  
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  IRBuilder<true, ConstantFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), ConstantFolder(F.getContext()),
+               InstCombineIRInserter(Worklist));
+  Builder = &TheBuilder;
   
   bool EverMadeChange = false;
 
@@ -13126,6 +12942,8 @@ bool InstCombiner::runOnFunction(Function &F) {
   unsigned Iteration = 0;
   while (DoOneIteration(F, Iteration++))
     EverMadeChange = true;
+  
+  Builder = 0;
   return EverMadeChange;
 }
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dee7bfb..8b11edd 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
@@ -26,13 +27,13 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumThreads, "Number of jumps threaded");
 STATISTIC(NumFolds,   "Number of terminators folded");
+STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
 
 static cl::opt<unsigned>
 Threshold("jump-threading-threshold", 
@@ -56,7 +57,7 @@ namespace {
   /// In this case, the unconditional branch at the end of the first if can be
   /// revectored to the false side of the second if.
   ///
-  class VISIBILITY_HIDDEN JumpThreading : public FunctionPass {
+  class JumpThreading : public FunctionPass {
     TargetData *TD;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -68,15 +69,16 @@ namespace {
     JumpThreading() : FunctionPass(&ID) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     bool runOnFunction(Function &F);
     void FindLoopHeaders(Function &F);
     
     bool ProcessBlock(BasicBlock *BB);
-    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
-                    unsigned JumpThreadCost);
+    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                          BasicBlock *PredBB);
+
     BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
@@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 /// runOnFunction - Top level algorithm.
 ///
 bool JumpThreading::runOnFunction(Function &F) {
-  DOUT << "Jump threading on function '" << F.getNameStart() << "'\n";
-  TD = &getAnalysis<TargetData>();
+  DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
+  TD = getAnalysisIfAvailable<TargetData>();
   
   FindLoopHeaders(F);
   
@@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) {
       // edges which simplifies the CFG.
       if (pred_begin(BB) == pred_end(BB) &&
           BB != &BB->getParent()->getEntryBlock()) {
-        DOUT << "  JT: Deleting dead block '" << BB->getNameStart()
-             << "' with terminator: " << *BB->getTerminator();
+        DEBUG(errs() << "  JT: Deleting dead block '" << BB->getName()
+              << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
         DeleteDeadBlock(BB);
         Changed = true;
@@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) {
   return EverChanged;
 }
 
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+  /// Ignore PHI nodes, these will be flattened when duplication happens.
+  BasicBlock::const_iterator I = BB->getFirstNonPHI();
+  
+  // Sum up the cost of each instruction until we get to the terminator.  Don't
+  // include the terminator because the copy won't include it.
+  unsigned Size = 0;
+  for (; !isa<TerminatorInst>(I); ++I) {
+    // Debugger intrinsics don't incur code size.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    
+    // If this is a pointer->pointer bitcast, it is free.
+    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+      continue;
+    
+    // All other instructions count for at least one unit.
+    ++Size;
+    
+    // Calls are more expensive.  If they are non-intrinsic calls, we model them
+    // as having cost of 4.  If they are a non-vector intrinsic, we model them
+    // as having cost of 2 total, and if they are a vector intrinsic, we model
+    // them as having cost 1.
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(CI))
+        Size += 3;
+      else if (!isa<VectorType>(CI->getType()))
+        Size += 1;
+    }
+  }
+  
+  // Threading through a switch statement is particularly profitable.  If this
+  // block ends in a switch, decrease its cost to make it more likely to happen.
+  if (isa<SwitchInst>(I))
+    Size = Size > 6 ? Size-6 : 0;
+  
+  return Size;
+}
+
+
+
 /// FindLoopHeaders - We do not want jump threading to turn proper loop
 /// structures into irreducible loops.  Doing this breaks up the loop nesting
 /// hierarchy and pessimizes later transformations.  To prevent this from
@@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
   if (CommonPreds.size() == 1)
     return CommonPreds[0];
     
-  DOUT << "  Factoring out " << CommonPreds.size()
-       << " common predecessors.\n";
+  DEBUG(errs() << "  Factoring out " << CommonPreds.size()
+        << " common predecessors.\n");
   return SplitBlockPredecessors(PN->getParent(),
                                 &CommonPreds[0], CommonPreds.size(),
                                 ".thr_comm", this);
 }
   
 
-/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
-  /// Ignore PHI nodes, these will be flattened when duplication happens.
-  BasicBlock::const_iterator I = BB->getFirstNonPHI();
-
-  // Sum up the cost of each instruction until we get to the terminator.  Don't
-  // include the terminator because the copy won't include it.
-  unsigned Size = 0;
-  for (; !isa<TerminatorInst>(I); ++I) {
-    // Debugger intrinsics don't incur code size.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-    
-    // If this is a pointer->pointer bitcast, it is free.
-    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
-      continue;
-    
-    // All other instructions count for at least one unit.
-    ++Size;
-    
-    // Calls are more expensive.  If they are non-intrinsic calls, we model them
-    // as having cost of 4.  If they are a non-vector intrinsic, we model them
-    // as having cost of 2 total, and if they are a vector intrinsic, we model
-    // them as having cost 1.
-    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(CI))
-        Size += 3;
-      else if (!isa<VectorType>(CI->getType()))
-        Size += 1;
-    }
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors.  This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+  TerminatorInst *BBTerm = BB->getTerminator();
+  unsigned MinSucc = 0;
+  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+  // Compute the successor with the minimum number of predecessors.
+  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+    TestBB = BBTerm->getSuccessor(i);
+    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+    if (NumPreds < MinNumPreds)
+      MinSucc = i;
   }
   
-  // Threading through a switch statement is particularly profitable.  If this
-  // block ends in a switch, decrease its cost to make it more likely to happen.
-  if (isa<SwitchInst>(I))
-    Size = Size > 6 ? Size-6 : 0;
-  
-  return Size;
+  return MinSucc;
 }
 
 /// ProcessBlock - If there are any predecessors whose control can be threaded
@@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // terminator to an unconditional branch.  This can occur due to threading in
   // other blocks.
   if (isa<ConstantInt>(Condition)) {
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding terminator: " << *BB->getTerminator();
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding terminator: " << *BB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(BB);
     return true;
   }
   
   // If the terminator is branching on an undef, we can pick any of the
-  // successors to branch to.  Since this is arbitrary, we pick the successor
-  // with the fewest predecessors.  This should reduce the in-degree of the
-  // others.
+  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
   if (isa<UndefValue>(Condition)) {
-    TerminatorInst *BBTerm = BB->getTerminator();
-    unsigned MinSucc = 0;
-    BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
-    // Compute the successor with the minimum number of predecessors.
-    unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-    for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
-      TestBB = BBTerm->getSuccessor(i);
-      unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-      if (NumPreds < MinNumPreds)
-        MinSucc = i;
-    }
+    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
     
     // Fold the branch/switch.
+    TerminatorInst *BBTerm = BB->getTerminator();
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
-      if (i == MinSucc) continue;
+      if (i == BestSucc) continue;
       BBTerm->getSuccessor(i)->removePredecessor(BB);
     }
     
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding undef terminator: " << *BBTerm;
-    BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm);
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding undef terminator: " << *BBTerm << '\n');
+    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
     return true;
   }
@@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   else if (PredBI->getSuccessor(0) != BB)
     BranchDir = false;
   else {
-    DOUT << "  In block '" << PredBB->getNameStart()
-         << "' folding terminator: " << *PredBB->getTerminator();
+    DEBUG(errs() << "  In block '" << PredBB->getName()
+          << "' folding terminator: " << *PredBB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(PredBB);
     return true;
@@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   // If the dest block has one predecessor, just fix the branch condition to a
   // constant and fold it.
   if (BB->getSinglePredecessor()) {
-    DOUT << "  In block '" << BB->getNameStart()
-         << "' folding condition to '" << BranchDir << "': "
-         << *BB->getTerminator();
+    DEBUG(errs() << "  In block '" << BB->getName()
+          << "' folding condition to '" << BranchDir << "': "
+          << *BB->getTerminator() << '\n');
     ++NumFolds;
-    DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir));
+    Value *OldCond = DestBI->getCondition();
+    DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+                                          BranchDir));
     ConstantFoldTerminator(BB);
+    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
     return true;
   }
-  
-  // Otherwise we need to thread from PredBB to DestBB's successor which
-  // involves code duplication.  Check to see if it is worth it.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
+ 
   
   // Next, figure out which successor we are threading to.
   BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
   if (PredBB == DestBB)
     return false;
   
-  
   SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
   SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
 
@@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
 
       // Otherwise, we're safe to make the change.  Make sure that the edge from
       // DestSI to DestSucc is not critical and has no PHI nodes.
-      DOUT << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI;
-      DOUT << "THROUGH: " << *DestSI;
+      DEBUG(errs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI);
+      DEBUG(errs() << "THROUGH: " << *DestSI);
 
       // If the destination has PHI nodes, just split the edge for updating
       // simplicity.
@@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     
     // If the returned value is the load itself, replace with an undef. This can
     // only happen in dead loops.
-    if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType());
+    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
     LI->replaceAllUsesWith(AvailableVal);
     LI->eraseFromParent();
     return true;
@@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 }
 
 
-/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
 /// the current block.  See if there are any simplifications we can do based on
 /// inputs to the phi node.
 /// 
 bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
-  // See if the phi node has any constant values.  If so, we can determine where
-  // the corresponding predecessor will branch.
-  ConstantInt *PredCst = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i))))
-      break;
-  
-  // If no incoming value has a constant, we don't know the destination of any
-  // predecessors.
-  if (PredCst == 0)
-    return false;
-  
-  // See if the cost of duplicating this block is low enough.
   BasicBlock *BB = PN->getParent();
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
+  
+  // See if the phi node has any constant integer or undef values.  If so, we
+  // can determine where the corresponding predecessor will branch.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PredVal = PN->getIncomingValue(i);
+    
+    // Check to see if this input is a constant integer.  If so, the direction
+    // of the branch is predictable.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
+      // Merge any common predecessors that will act the same.
+      BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+      
+      BasicBlock *SuccBB;
+      if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+        SuccBB = BI->getSuccessor(CI->isZero());
+      else {
+        SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+        SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
+      }
+      
+      // Ok, try to thread it!
+      return ThreadEdge(BB, PredBB, SuccBB);
+    }
+    
+    // If the input is an undef, then it doesn't matter which way it will go.
+    // Pick an arbitrary dest and thread the edge.
+    if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
+      // Merge any common predecessors that will act the same.
+      BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
+      BasicBlock *SuccBB =
+        BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
+      
+      // Ok, try to thread it!
+      return ThreadEdge(BB, PredBB, SuccBB);
+    }
   }
   
-  // If so, we can actually do this threading.  Merge any common predecessors
-  // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+  // If the incoming values are all variables, we don't know the destination of
+  // any predecessors.  However, if any of the predecessor blocks end in an
+  // unconditional branch, we can *duplicate* the jump into that block in order
+  // to further encourage jump threading and to eliminate cases where we have
+  // branch on a phi of an icmp (branch on icmp is much better).
+
+  // We don't want to do this tranformation for switches, because we don't
+  // really want to duplicate a switch.
+  if (isa<SwitchInst>(BB->getTerminator()))
+    return false;
   
-  // Next, figure out which successor we are threading to.
-  BasicBlock *SuccBB;
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-    SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse());
-  else {
-    SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
-    SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
+  // Look for unconditional branch predecessors.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+      if (PredBr->isUnconditional() &&
+          // Try to duplicate BB into PredBB.
+          DuplicateCondBranchOnPHIIntoPred(BB, PredBB))
+        return true;
   }
-  
-  // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+
+  return false;
 }
 
+
 /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
 /// whose condition is an AND/OR where one side is PN.  If PN has constant
 /// operands that permit us to evaluate the condition for some operand, thread
@@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   // We can only do the simplification for phi nodes of 'false' with AND or
   // 'true' with OR.  See if we have any entries in the phi for this.
   unsigned PredNo = ~0U;
-  ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd);
+  ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+                                          !isAnd);
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) == PredCst) {
       PredNo = i;
@@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   if (PredNo == ~0U)
     return false;
   
-  // See if the cost of duplicating this block is low enough.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
-
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
   BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
@@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
@@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
 /// result can not be determined, a null pointer is returned.
 static Constant *GetResultOfComparison(CmpInst::Predicate pred,
                                        Value *LHS, Value *RHS,
-                                       LLVMContext* Context) {
+                                       LLVMContext &Context) {
   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return Context->getConstantExprCompare(pred, CLHS, CRHS);
+      return ConstantExpr::getCompare(pred, CLHS, CRHS);
 
   if (LHS == RHS)
     if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
       return ICmpInst::isTrueWhenEqual(pred) ? 
-                 Context->getConstantIntTrue() : Context->getConstantIntFalse();
+                 ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
 
   return 0;
 }
@@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
     PredVal = PN->getIncomingValue(i);
     
     Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
-                                          RHS, Context);
+                                          RHS, Cmp->getContext());
     if (!Res) {
       PredVal = 0;
       continue;
@@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   if (PredVal == 0)
     return false;
   
-  // See if the cost of duplicating this block is low enough.
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
-  if (JumpThreadCost > Threshold) {
-    DOUT << "  Not threading BB '" << BB->getNameStart()
-         << "' - Cost is too high: " << JumpThreadCost << "\n";
-    return false;
-  }
-  
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
   BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
@@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
   
   // Ok, try to thread it!
-  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
+  return ThreadEdge(BB, PredBB, SuccBB);
 }
 
 
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+                                            BasicBlock *OldPred,
+                                            BasicBlock *NewPred,
+                                     DenseMap<Instruction*, Value*> &ValueMap) {
+  for (BasicBlock::iterator PNI = PHIBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+    // Ok, we have a PHI node.  Figure out what the incoming value was for the
+    // DestBlock.
+    Value *IV = PN->getIncomingValueForBlock(OldPred);
+    
+    // Remap the value if necessary.
+    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+      if (I != ValueMap.end())
+        IV = I->second;
+    }
+    
+    PN->addIncoming(IV, NewPred);
+  }
+}
+
 /// ThreadEdge - We have decided that it is safe and profitable to thread an
 /// edge from PredBB to SuccBB across BB.  Transform the IR to reflect this
 /// change.
 bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 
-                               BasicBlock *SuccBB, unsigned JumpThreadCost) {
-
+                               BasicBlock *SuccBB) {
   // If threading to the same block as we come from, we would infinite loop.
   if (SuccBB == BB) {
-    DOUT << "  Not threading across BB '" << BB->getNameStart()
-         << "' - would thread to self!\n";
+    DEBUG(errs() << "  Not threading across BB '" << BB->getName()
+          << "' - would thread to self!\n");
     return false;
   }
   
   // If threading this would thread across a loop header, don't thread the edge.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
-    DOUT << "  Not threading from '" << PredBB->getNameStart()
-         << "' across loop header BB '" << BB->getNameStart()
-         << "' to dest BB '" << SuccBB->getNameStart()
-         << "' - it might create an irreducible loop!\n";
+    DEBUG(errs() << "  Not threading from '" << PredBB->getName()
+          << "' across loop header BB '" << BB->getName()
+          << "' to dest BB '" << SuccBB->getName()
+          << "' - it might create an irreducible loop!\n");
     return false;
   }
 
-  // And finally, do it!
-  DOUT << "  Threading edge from '" << PredBB->getNameStart() << "' to '"
-       << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost
-       << ", across block:\n    "
-       << *BB << "\n";
-  
-  // Jump Threading can not update SSA properties correctly if the values
-  // defined in the duplicated block are used outside of the block itself.  For
-  // this reason, we spill all values that are used outside of BB to the stack.
-  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-    if (!I->isUsedOutsideOfBlock(BB))
-      continue;
-    
-    // We found a use of I outside of BB.  Create a new stack slot to
-    // break this inter-block usage pattern.
-    DemoteRegToStack(*I);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  if (JumpThreadCost > Threshold) {
+    DEBUG(errs() << "  Not threading BB '" << BB->getName()
+          << "' - Cost is too high: " << JumpThreadCost << "\n");
+    return false;
   }
- 
+  
+  // And finally, do it!
+  DEBUG(errs() << "  Threading edge from '" << PredBB->getName() << "' to '"
+        << SuccBB->getName() << "' with cost: " << JumpThreadCost
+        << ", across block:\n    "
+        << *BB << "\n");
+  
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
   // account for entry from PredBB.
   DenseMap<Instruction*, Value*> ValueMapping;
   
-  BasicBlock *NewBB =
-    BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), 
+                                         BB->getName()+".thread", 
+                                         BB->getParent(), BB);
   NewBB->moveAfter(PredBB);
   
   BasicBlock::iterator BI = BB->begin();
@@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   // mapping and using it to remap operands in the cloned instructions.
   for (; !isa<TerminatorInst>(BI); ++BI) {
     Instruction *New = BI->clone();
-    New->setName(BI->getNameStart());
+    New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
     ValueMapping[BI] = New;
    
@@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   
   // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
   // PHI nodes for NewBB now.
-  for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) {
-    PHINode *PN = cast<PHINode>(PNI);
-    // Ok, we have a PHI node.  Figure out what the incoming value was for the
-    // DestBlock.
-    Value *IV = PN->getIncomingValueForBlock(BB);
-    
-    // Remap the value if necessary.
-    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
-      DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
-      if (I != ValueMapping.end())
-        IV = I->second;
+  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+  
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+      
+      UsesToRename.push_back(&UI.getUse());
     }
-    PN->addIncoming(IV, NewBB);
+    
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+    
+    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I);
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+    
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(errs() << "\n");
   }
   
+  
   // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
   // NewBB instead of BB.  This eliminates predecessors from BB, which requires
   // us to simplify any PHI nodes in BB.
@@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   BI = NewBB->begin();
   for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
     Instruction *Inst = BI++;
-    if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+    if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
       Inst->replaceAllUsesWith(C);
       Inst->eraseFromParent();
       continue;
@@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
   ++NumThreads;
   return true;
 }
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                                     BasicBlock *PredBB) {
+  // If BB is a loop header, then duplicating this block outside the loop would
+  // cause us to transform this into an irreducible loop, don't do this.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(errs() << "  Not duplicating loop header '" << BB->getName()
+          << "' into predecessor block '" << PredBB->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+  
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  if (DuplicationCost > Threshold) {
+    DEBUG(errs() << "  Not duplicating BB '" << BB->getName()
+          << "' - Cost is too high: " << DuplicationCost << "\n");
+    return false;
+  }
+  
+  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
+  // of PredBB.
+  DEBUG(errs() << "  Duplicating block '" << BB->getName() << "' into end of '"
+        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
+        << DuplicationCost << " block is:" << *BB << "\n");
+  
+  // We are going to have to map operands from the original BB block into the
+  // PredBB block.  Evaluate PHI nodes in BB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+  
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+  
+  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  
+  // Clone the non-phi instructions of BB into PredBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; BI != BB->end(); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    PredBB->getInstList().insert(OldPredBranch, New);
+    ValueMapping[BI] = New;
+    
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+  }
+  
+  // Check to see if the targets of the branch had PHI nodes. If so, we need to
+  // add entries to the PHI nodes for branch from PredBB now.
+  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+                                  ValueMapping);
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+                                  ValueMapping);
+  
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+      
+      UsesToRename.push_back(&UI.getUse());
+    }
+    
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+    
+    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I);
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+    
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(errs() << "\n");
+  }
+  
+  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+  // that we nuked.
+  BB->removePredecessor(PredBB);
+  
+  // Remove the unconditional branch at the end of the PredBB block.
+  OldPredBranch->eraseFromParent();
+  
+  ++NumDupes;
+  return true;
+}
+
+
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d6daeca..756fbf3 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -35,8 +35,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -46,8 +46,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/Statistic.h"
 #include <algorithm>
@@ -73,7 +73,7 @@ EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden,
                                   "global variables"));
 
 namespace {
-  struct VISIBILITY_HIDDEN LICM : public LoopPass {
+  struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LICM() : LoopPass(&ID) {}
 
@@ -91,6 +91,7 @@ namespace {
       AU.addRequired<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominanceFrontier>();
+      AU.addPreservedID(LoopSimplifyID);
     }
 
     bool doFinalization() {
@@ -338,7 +339,6 @@ void LICM::SinkRegion(DomTreeNode *N) {
   }
 }
 
-
 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
 /// first order w.r.t the DominatorTree.  This allows us to visit definitions
@@ -389,9 +389,13 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // Don't hoist loads which have may-aliased stores in loop.
     unsigned Size = 0;
     if (LI->getType()->isSized())
-      Size = AA->getTargetData().getTypeStoreSize(LI->getType());
+      Size = AA->getTypeStoreSize(LI->getType());
     return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+    if (isa<DbgStopPointInst>(CI)) {
+      // Don't hoist/sink dbgstoppoints, we handle them separately
+      return false;
+    }
     // Handle obvious cases efficiently.
     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
@@ -465,7 +469,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
 /// position, and may either delete it or move it to outside of the loop.
 ///
 void LICM::sink(Instruction &I) {
-  DOUT << "LICM sinking instruction: " << I;
+  DEBUG(errs() << "LICM sinking instruction: " << I);
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
@@ -482,22 +486,27 @@ void LICM::sink(Instruction &I) {
     if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
-      if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-        I.replaceAllUsesWith(Context->getUndef(I.getType()));
+      // If I has users in unreachable blocks, eliminate.
+      // If I is not void type then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!I.getType()->isVoidTy())
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
       I.eraseFromParent();
     } else {
       // Move the instruction to the start of the exit block, after any PHI
       // nodes in it.
       I.removeFromParent();
-
       BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
       ExitBlocks[0]->getInstList().insert(InsertPt, &I);
     }
   } else if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
-    if (!I.use_empty())  // If I has users in unreachable blocks, eliminate.
-      I.replaceAllUsesWith(Context->getUndef(I.getType()));
+    // If I has users in unreachable blocks, eliminate.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I.getType()->isVoidTy())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
     I.eraseFromParent();
   } else {
     // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
@@ -507,7 +516,7 @@ void LICM::sink(Instruction &I) {
     // Firstly, we create a stack object to hold the value...
     AllocaInst *AI = 0;
 
-    if (I.getType() != Type::VoidTy) {
+    if (!I.getType()->isVoidTy()) {
       AI = new AllocaInst(I.getType(), 0, I.getName(),
                           I.getParent()->getParent()->getEntryBlock().begin());
       CurAST->add(AI);
@@ -593,7 +602,7 @@ void LICM::sink(Instruction &I) {
     if (AI) {
       std::vector<AllocaInst*> Allocas;
       Allocas.push_back(AI);
-      PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+      PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST);
     }
   }
 }
@@ -602,7 +611,8 @@ void LICM::sink(Instruction &I) {
 /// that is safe to hoist, this instruction is called to do the dirty work.
 ///
 void LICM::hoist(Instruction &I) {
-  DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I;
+  DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": "
+        << I << "\n");
 
   // Remove the instruction from its current basic block... but don't delete the
   // instruction.
@@ -623,7 +633,8 @@ void LICM::hoist(Instruction &I) {
 ///
 bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   // If it is not a trapping instruction, it is always safe to hoist.
-  if (!Inst.isTrapping()) return true;
+  if (Inst.isSafeToSpeculativelyExecute())
+    return true;
 
   // Otherwise we have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
@@ -635,12 +646,6 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   if (Inst.getParent() == CurLoop->getHeader())
     return true;
 
-  // It's always safe to load from a global or alloca.
-  if (isa<LoadInst>(Inst))
-    if (isa<AllocationInst>(Inst.getOperand(0)) ||
-        isa<GlobalVariable>(Inst.getOperand(0)))
-      return true;
-
   // Get the exit blocks for the current loop.
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
@@ -773,7 +778,7 @@ void LICM::PromoteValuesInLoop() {
   PromotedAllocas.reserve(PromotedValues.size());
   for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
     PromotedAllocas.push_back(PromotedValues[i].first);
-  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
+  PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST);
 }
 
 /// FindPromotableValuesInLoop - Check the current loop for stores to definite
@@ -862,7 +867,7 @@ void LICM::FindPromotableValuesInLoop(
     for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
       ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
 
-    DOUT << "LICM: Promoting value: " << *V << "\n";
+    DEBUG(errs() << "LICM: Promoting value: " << *V << "\n");
   }
 }
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 302cdec..5f93756 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -15,19 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-delete"
-
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-
 using namespace llvm;
 
 STATISTIC(NumDeleted, "Number of loops deleted");
 
 namespace {
-  class VISIBILITY_HIDDEN LoopDeletion : public LoopPass {
+  class LoopDeletion : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopDeletion() : LoopPass(&ID) {}
@@ -38,9 +36,9 @@ namespace {
     bool SingleDominatingExit(Loop* L,
                               SmallVector<BasicBlock*, 4>& exitingBlocks);
     bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
-                    SmallVector<BasicBlock*, 4>& exitBlocks);
-    bool IsLoopInvariantInst(Instruction *I, Loop* L);
-    
+                    SmallVector<BasicBlock*, 4>& exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
     virtual void getAnalysisUsage(AnalysisUsage& AU) const {
       AU.addRequired<ScalarEvolution>();
       AU.addRequired<DominatorTree>();
@@ -84,32 +82,13 @@ bool LoopDeletion::SingleDominatingExit(Loop* L,
   return DT.dominates(exitingBlocks[0], latch);
 }
 
-/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to
-/// a loop, which is defined as being true if all of its operands are defined
-/// outside of the loop.  These instructions can be hoisted out of the loop
-/// if their results are needed.  This could be made more aggressive by
-/// recursively checking the operands for invariance, but it's not clear that
-/// it's worth it.
-bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L)  {
-  // PHI nodes are not loop invariant if defined in  the loop.
-  if (isa<PHINode>(I) && L->contains(I->getParent()))
-    return false;
-    
-  // The instruction is loop invariant if all of its operands are loop-invariant
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-    if (!L->isLoopInvariant(I->getOperand(i)))
-      return false;
-  
-  // If we got this far, the instruction is loop invariant!
-  return true;
-}
-
 /// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
 /// checked for unique exit and exiting blocks, and that the code is in LCSSA
 /// form.
 bool LoopDeletion::IsLoopDead(Loop* L,
                               SmallVector<BasicBlock*, 4>& exitingBlocks,
-                              SmallVector<BasicBlock*, 4>& exitBlocks) {
+                              SmallVector<BasicBlock*, 4>& exitBlocks,
+                              bool &Changed, BasicBlock *Preheader) {
   BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock* exitBlock = exitBlocks[0];
   
@@ -122,7 +101,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
     Value* incoming = P->getIncomingValueForBlock(exitingBlock);
     if (Instruction* I = dyn_cast<Instruction>(incoming))
-      if (!IsLoopInvariantInst(I, L))
+      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
       
     BI++;
@@ -181,15 +160,16 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
     return false;
   
   // Finally, we have to check that the loop really is dead.
-  if (!IsLoopDead(L, exitingBlocks, exitBlocks))
-    return false;
+  bool Changed = false;
+  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+    return Changed;
   
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
   ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
-  const SCEV* S = SE.getBackedgeTakenCount(L);
+  const SCEV *S = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
-    return false;
+    return Changed;
   
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.  
@@ -199,18 +179,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.  Don't
   // mess with this unless you have good reason and know what you're doing.
-  
-  // Move simple loop-invariant expressions out of the loop, since they
-  // might be needed by the exit phis.
-  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
-       LI != LE; ++LI)
-    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
-         BI != BE; ) {
-      Instruction* I = BI++;
-      if (!I->use_empty() && IsLoopInvariantInst(I, L))
-        I->moveBefore(preheader->getTerminator());
-    }
-  
+
+  // Tell ScalarEvolution that the loop is deleted. Do this before
+  // deleting the loop so that ScalarEvolution can look at the loop
+  // to determine what it needs to clean up.
+  SE.forgetLoopBackedgeTakenCount(L);
+
   // Connect the preheader directly to the exit block.
   TerminatorInst* TI = preheader->getTerminator();
   TI->replaceUsesOfWith(L->getHeader(), exitBlock);
@@ -248,11 +222,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
     (*LI)->dropAllReferences();
   }
   
-  // Tell ScalarEvolution that the loop is deleted. Do this before
-  // deleting the loop so that ScalarEvolution can look at the loop
-  // to determine what it needs to clean up.
-  SE.forgetLoopBackedgeTakenCount(L);
-
   // Erase the instructions and the blocks without having to worry
   // about ordering because we already dropped the references.
   // NOTE: This iteration is safe because erasing the block does not remove its
@@ -273,8 +242,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // The last step is to inform the loop pass manager that we've
   // eliminated this loop.
   LPM.deleteLoopFromQueue(L);
+  Changed = true;
   
   NumDeleted++;
   
-  return true;
+  return Changed;
 }
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 38e3a8b..5f9d370 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -51,7 +51,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-index-split"
-
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
@@ -61,7 +60,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 
@@ -73,8 +71,7 @@ STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
 
 namespace {
 
-  class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass {
-
+  class LoopIndexSplit : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopIndexSplit() : LoopPass(&ID) {}
@@ -294,31 +291,33 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
 
 // Return V+1
 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, 
-                         LLVMContext* Context) {
-  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
+                         LLVMContext &Context) {
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
 }
 
 // Return V-1
 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
-                          LLVMContext* Context) {
-  Constant *One = Context->getConstantInt(V->getType(), 1, Sign);
+                          LLVMContext &Context) {
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
 }
 
 // Return min(V1, V1)
 static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
  
-  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp", InsertPt);
+  Value *C = new ICmpInst(InsertPt,
+                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                          V1, V2, "lsp");
   return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
 }
 
 // Return max(V1, V2)
 static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
  
-  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp", InsertPt);
+  Value *C = new ICmpInst(InsertPt, 
+                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                          V1, V2, "lsp");
   return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
 }
 
@@ -427,15 +426,15 @@ bool LoopIndexSplit::processOneIterationLoop() {
   //      c1 = icmp uge i32 SplitValue, StartValue
   //      c2 = icmp ult i32 SplitValue, ExitValue
   //      and i32 c1, c2 
-  Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ? 
+  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ? 
                                  ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
-                                 SplitValue, StartValue, "lisplit", BR);
+                                 SplitValue, StartValue, "lisplit");
 
   CmpInst::Predicate C2P  = ExitCondition->getPredicate();
   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  if (LatchBR->getOperand(0) != Header)
+  if (LatchBR->getOperand(1) != Header)
     C2P = CmpInst::getInversePredicate(C2P);
-  Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR);
+  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
   Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
 
   SplitCondition->replaceAllUsesWith(NSplitCond);
@@ -491,6 +490,8 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
     EBR->setSuccessor(1, T);
   }
 
+  LLVMContext &Context = Op.getContext();
+
   // New upper and lower bounds.
   Value *NLB = NULL;
   Value *NUB = NULL;
@@ -698,7 +699,8 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
          E = df_end(DN); DI != E; ++DI) {
     BasicBlock *BB = DI->getBlock();
     WorkList.push_back(BB);
-    BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy));
+    BB->replaceAllUsesWith(UndefValue::get(
+                                       Type::getLabelTy(DeadBB->getContext())));
   }
 
   while (!WorkList.empty()) {
@@ -877,6 +879,8 @@ bool LoopIndexSplit::splitLoop() {
   BasicBlock *ExitingBlock = ExitCondition->getParent();
   if (!cleanBlock(ExitingBlock)) return false;
 
+  LLVMContext &Context = Header->getContext();
+
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I) {
     BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 1f7892a..70c69bb 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -32,7 +32,7 @@ using namespace llvm;
 STATISTIC(NumRotated, "Number of loops rotated");
 namespace {
 
-  class VISIBILITY_HIDDEN RenameData {
+  class RenameData {
   public:
     RenameData(Instruction *O, Value *P, Instruction *H) 
       : Original(O), PreHeader(P), Header(H) { }
@@ -42,8 +42,7 @@ namespace {
     Instruction *Header; // New header replacement
   };
   
-  class VISIBILITY_HIDDEN LoopRotate : public LoopPass {
-
+  class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopRotate() : LoopPass(&ID) {}
@@ -178,6 +177,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 
   // Now, this loop is suitable for rotation.
 
+  // Anything ScalarEvolution may know about this loop or the PHI nodes
+  // in its header will soon be invalidated.
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoopBackedgeTakenCount(L);
+
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
   // loop.  Otherwise loop is not suitable for rotation.
@@ -435,7 +439,8 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
   // Right now original pre-header has two successors, new header and
   // exit block. Insert new block between original pre-header and
   // new header such that loop's new pre-header has only one successor.
-  BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph",
+  BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
+                                                "bb.nph",
                                                 OrigHeader->getParent(), 
                                                 NewHeader);
   LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
@@ -511,26 +516,30 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
       DF->addBasicBlock(L->getHeader(), LatchSet);
     }
 
-    // If a loop block dominates new loop latch then its frontier is
-    // new header and Exit.
+    // If a loop block dominates new loop latch then add to its frontiers
+    // new header and Exit and remove new latch (which is equal to original
+    // header).
     BasicBlock *NewLatch = L->getLoopLatch();
-    DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-    for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
-         BI != BE; ++BI) {
-      BasicBlock *B = *BI;
-      if (DT->dominates(B, NewLatch)) {
-        DominanceFrontier::iterator BDFI = DF->find(B);
-        if (BDFI != DF->end()) {
-          DominanceFrontier::DomSetType &BSet = BDFI->second;
-          BSet = BDFI->second;
-          BSet.clear();
-          BSet.insert(L->getHeader());
-          BSet.insert(Exit);
-        } else {
-          DominanceFrontier::DomSetType BSet;
-          BSet.insert(L->getHeader());
-          BSet.insert(Exit);
-          DF->addBasicBlock(B, BSet);
+
+    assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
+
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+           BI != BE; ++BI) {
+        BasicBlock *B = *BI;
+        if (DT->dominates(B, NewLatch)) {
+          DominanceFrontier::iterator BDFI = DF->find(B);
+          if (BDFI != DF->end()) {
+            DominanceFrontier::DomSetType &BSet = BDFI->second;
+            BSet.erase(NewLatch);
+            BSet.insert(L->getHeader());
+            BSet.insert(Exit);
+          } else {
+            DominanceFrontier::DomSetType BSet;
+            BSet.insert(L->getHeader());
+            BSet.insert(Exit);
+            DF->addBasicBlock(B, BSet);
+          }
         }
       }
     }
@@ -538,22 +547,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
 
   // Preserve canonical loop form, which means Exit block should
   // have only one predecessor.
-  BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this);
-
-  // Preserve LCSSA.
-  for (BasicBlock::iterator I = Exit->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I) {
-    unsigned N = PN->getNumIncomingValues();
-    for (unsigned index = 0; index != N; ++index)
-      if (PN->getIncomingBlock(index) == NExit) {
-        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(),
-                                         NExit->begin());
-        NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch());
-        PN->setIncomingValue(index, NewPN);
-        PN->setIncomingBlock(index, NExit);
-        break;
-      }
-  }
+  SplitEdge(L->getLoopLatch(), Exit, this);
 
   assert(NewHeader && L->getHeader() == NewHeader &&
          "Invalid loop header after loop rotation");
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 046fed3..d8f6cc1 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/Dominators.h"
@@ -38,9 +37,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 using namespace llvm;
@@ -64,26 +63,26 @@ namespace {
   /// IVInfo - This structure keeps track of one IV expression inserted during
   /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
   /// well as the PHI node and increment value created for rewrite.
-  struct VISIBILITY_HIDDEN IVExpr {
-    const SCEV*  Stride;
-    const SCEV*  Base;
+  struct IVExpr {
+    const SCEV *Stride;
+    const SCEV *Base;
     PHINode    *PHI;
 
-    IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi)
+    IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
       : Stride(stride), Base(base), PHI(phi) {}
   };
 
   /// IVsOfOneStride - This structure keeps track of all IV expression inserted
   /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
-  struct VISIBILITY_HIDDEN IVsOfOneStride {
+  struct IVsOfOneStride {
     std::vector<IVExpr> IVs;
 
-    void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) {
+    void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
       IVs.push_back(IVExpr(Stride, Base, PHI));
     }
   };
 
-  class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass {
+  class LoopStrengthReduce : public LoopPass {
     IVUsers *IU;
     LoopInfo *LI;
     DominatorTree *DT;
@@ -92,11 +91,11 @@ namespace {
 
     /// IVsByStride - Keep track of all IVs that have been inserted for a
     /// particular stride.
-    std::map<const SCEV*, IVsOfOneStride> IVsByStride;
+    std::map<const SCEV *, IVsOfOneStride> IVsByStride;
 
     /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
     /// reused (nor should they be rewritten to reuse other strides).
-    SmallSet<const SCEV*, 4> StrideNoReuse;
+    SmallSet<const SCEV *, 4> StrideNoReuse;
 
     /// DeadInsts - Keep track of instructions we may have made dead, so that
     /// we can remove them after we are done working.
@@ -134,7 +133,7 @@ namespace {
   private:
     ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
                                   IVStrideUse* &CondUse,
-                                  const SCEV* const *  &CondStride);
+                                  const SCEV *const *  &CondStride);
 
     void OptimizeIndvars(Loop *L);
     void OptimizeLoopCountIV(Loop *L);
@@ -150,16 +149,16 @@ namespace {
                           IVStrideUse* &CondUse);
 
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                           const SCEV* const * &CondStride);
+                           const SCEV *const * &CondStride);
     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&,
+    const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
                              IVExpr&, const Type*,
                              const std::vector<BasedUser>& UsersToProcess);
     bool ValidScale(bool, int64_t,
                     const std::vector<BasedUser>& UsersToProcess);
     bool ValidOffset(bool, int64_t, int64_t,
                      const std::vector<BasedUser>& UsersToProcess);
-    const SCEV* CollectIVUsers(const SCEV* const &Stride,
+    const SCEV *CollectIVUsers(const SCEV *const &Stride,
                               IVUsersOfOneStride &Uses,
                               Loop *L,
                               bool &AllUsesAreAddresses,
@@ -169,11 +168,11 @@ namespace {
                                 const std::vector<BasedUser> &UsersToProcess,
                                 const Loop *L,
                                 bool AllUsesAreAddresses,
-                                const SCEV* Stride);
+                                const SCEV *Stride);
     void PrepareToStrengthReduceFully(
                              std::vector<BasedUser> &UsersToProcess,
-                             const SCEV* Stride,
-                             const SCEV* CommonExprs,
+                             const SCEV *Stride,
+                             const SCEV *CommonExprs,
                              const Loop *L,
                              SCEVExpander &PreheaderRewriter);
     void PrepareToStrengthReduceFromSmallerStride(
@@ -183,13 +182,13 @@ namespace {
                                          Instruction *PreInsertPt);
     void PrepareToStrengthReduceWithNewPhi(
                                   std::vector<BasedUser> &UsersToProcess,
-                                  const SCEV* Stride,
-                                  const SCEV* CommonExprs,
+                                  const SCEV *Stride,
+                                  const SCEV *CommonExprs,
                                   Value *CommonBaseV,
                                   Instruction *IVIncInsertPt,
                                   const Loop *L,
                                   SCEVExpander &PreheaderRewriter);
-    void StrengthReduceStridedIVUsers(const SCEV* const &Stride,
+    void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
                                       IVUsersOfOneStride &Uses,
                                       Loop *L);
     void DeleteTriviallyDeadInstructions();
@@ -233,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
 /// containsAddRecFromDifferentLoop - Determine whether expression S involves a 
 /// subexpression that is an AddRec from a loop other than L.  An outer loop 
 /// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
+static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
   // This is very common, put it first.
   if (isa<SCEVConstant>(S))
     return false;
@@ -248,7 +247,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop))
+      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
         return false;
     }
     return true;
@@ -328,7 +327,7 @@ namespace {
     /// this use.  As the use is processed, information gets moved from this
     /// field to the Imm field (below).  BasedUser values are sorted by this
     /// field.
-    const SCEV* Base;
+    const SCEV *Base;
     
     /// Inst - The instruction using the induction variable.
     Instruction *Inst;
@@ -341,7 +340,7 @@ namespace {
     /// before Inst, because it will be folded into the imm field of the
     /// instruction.  This is also sometimes used for loop-variant values that
     /// must be added inside the loop.
-    const SCEV* Imm;
+    const SCEV *Imm;
 
     /// Phi - The induction variable that performs the striding that
     /// should be used for this user.
@@ -363,13 +362,13 @@ namespace {
     // Once we rewrite the code to insert the new IVs we want, update the
     // operands of Inst to use the new expression 'NewBase', with 'Imm' added
     // to it.
-    void RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
+    void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                         Instruction *InsertPt,
                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
                                         LoopInfo &LI,
                                         SmallVectorImpl<WeakVH> &DeadInsts);
     
-    Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase, 
+    Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
                                        const Type *Ty,
                                        SCEVExpander &Rewriter,
                                        Instruction *IP, Loop *L,
@@ -379,12 +378,12 @@ namespace {
 }
 
 void BasedUser::dump() const {
-  cerr << " Base=" << *Base;
-  cerr << " Imm=" << *Imm;
-  cerr << "   Inst: " << *Inst;
+  errs() << " Base=" << *Base;
+  errs() << " Imm=" << *Imm;
+  errs() << "   Inst: " << *Inst;
 }
 
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, 
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, 
                                               const Type *Ty,
                                               SCEVExpander &Rewriter,
                                               Instruction *IP, Loop *L,
@@ -408,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
   
   Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
 
-  const SCEV* NewValSCEV = SE->getUnknown(Base);
+  const SCEV *NewValSCEV = SE->getUnknown(Base);
 
   // Always emit the immediate into the same block as the user.
   NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
@@ -423,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
 // value of NewBase in the case that it's a diffferent instruction from
 // the PHI that NewBase is computed from, or null otherwise.
 //
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
+void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                                Instruction *NewBasePt,
                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
                                       LoopInfo &LI,
@@ -460,9 +459,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
     // Replace the use of the operand Value with the new Phi we just created.
     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
 
-    DOUT << "      Replacing with ";
-    DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false));
-    DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+    DEBUG(errs() << "      Replacing with ");
+    DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false));
+    DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+                 << *Imm << "\n");
     return;
   }
 
@@ -483,43 +483,45 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
       // loop because multiple copies sometimes do useful sinking of code in
       // that case(?).
       Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
+      BasicBlock *PHIPred = PN->getIncomingBlock(i);
       if (L->contains(OldLoc->getParent())) {
         // If this is a critical edge, split the edge so that we do not insert
         // the code on all predecessor/successor paths.  We do this unless this
         // is the canonical backedge for this loop, as this can make some
         // inserted code be in an illegal position.
-        BasicBlock *PHIPred = PN->getIncomingBlock(i);
         if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
             (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
 
           // First step, split the critical edge.
-          SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
+          BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
+                                                P, false);
 
           // Next step: move the basic block.  In particular, if the PHI node
           // is outside of the loop, and PredTI is in the loop, we want to
           // move the block to be immediately before the PHI block, not
           // immediately after PredTI.
-          if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
-            BasicBlock *NewBB = PN->getIncomingBlock(i);
+          if (L->contains(PHIPred) && !L->contains(PN->getParent()))
             NewBB->moveBefore(PN->getParent());
-          }
 
           // Splitting the edge can reduce the number of PHI entries we have.
           e = PN->getNumIncomingValues();
+          PHIPred = NewBB;
+          i = PN->getBasicBlockIndex(PHIPred);
         }
       }
-      Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
+      Value *&Code = InsertedCode[PHIPred];
       if (!Code) {
         // Insert the code into the end of the predecessor block.
         Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
-                                PN->getIncomingBlock(i)->getTerminator() :
+                                PHIPred->getTerminator() :
                                 OldLoc->getParent()->getTerminator();
         Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
                                            Rewriter, InsertPt, L, LI);
 
-        DOUT << "      Changing PHI use to ";
-        DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false));
-        DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n";
+        DEBUG(errs() << "      Changing PHI use to ");
+        DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
+        DEBUG(errs() << ", which has value " << *NewBase << " plus IMM "
+                     << *Imm << "\n");
       }
 
       // Replace the use of the operand Value with the new Phi we just created.
@@ -535,7 +537,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,
 
 /// fitsInAddressMode - Return true if V can be subsumed within an addressing
 /// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy,
+static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
                              const TargetLowering *TLI, bool HasBaseReg) {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
     int64_t VC = SC->getValue()->getSExtValue();
@@ -567,12 +569,12 @@ static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy,
 
 /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
 /// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
+static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
                                              Loop *L, ScalarEvolution *SE) {
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
   
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV*, 4> NewOps;
+    SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -590,10 +592,10 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
       Val = SE->getAddExpr(NewOps);
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
     // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV* Start = SARE->getStart();
+    const SCEV *Start = SARE->getStart();
     MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
     
-    SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+    SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
     Ops[0] = Start;
     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
   } else {
@@ -609,15 +611,15 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,
 /// Accumulate these immediate values into the Imm value.
 static void MoveImmediateValues(const TargetLowering *TLI,
                                 const Type *AccessTy,
-                                const SCEV* &Val, const SCEV* &Imm,
+                                const SCEV *&Val, const SCEV *&Imm,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV*, 4> NewOps;
+    SmallVector<const SCEV *, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
-      const SCEV* NewOp = SAE->getOperand(i);
+      const SCEV *NewOp = SAE->getOperand(i);
       MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
       
       if (!NewOp->isLoopInvariant(L)) {
@@ -636,11 +638,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
     return;
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
     // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV* Start = SARE->getStart();
+    const SCEV *Start = SARE->getStart();
     MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
     
     if (Start != SARE->getStart()) {
-      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Start;
       Val = SE->getAddRecExpr(Ops, SARE->getLoop());
     }
@@ -651,8 +653,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
         fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
         SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
 
-      const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType());
-      const SCEV* NewOp = SME->getOperand(1);
+      const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
+      const SCEV *NewOp = SME->getOperand(1);
       MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
       
       // If we extracted something out of the subexpressions, see if we can 
@@ -687,7 +689,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 
 static void MoveImmediateValues(const TargetLowering *TLI,
                                 Instruction *User,
-                                const SCEV* &Val, const SCEV* &Imm,
+                                const SCEV *&Val, const SCEV *&Imm,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   const Type *AccessTy = getAccessType(User);
@@ -697,19 +699,19 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
 /// added together.  This is used to reassociate common addition subexprs
 /// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(SmallVector<const SCEV*, 16> &SubExprs,
-                             const SCEV* Expr,
+static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
+                             const SCEV *Expr,
                              ScalarEvolution *SE) {
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
     for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
       SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
   } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
-    const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType());
+    const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
     if (SARE->getOperand(0) == Zero) {
       SubExprs.push_back(Expr);
     } else {
       // Compute the addrec with zero as its base.
-      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Zero;   // Start with zero base.
       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
       
@@ -733,7 +735,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
 /// not remove anything.  This looks for things like (a+b+c) and
 /// (a+c+d) and computes the common (a+c) subexpression.  The common expression
 /// is *removed* from the Bases and returned.
-static const SCEV* 
+static const SCEV *
 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
                                     ScalarEvolution *SE, Loop *L,
                                     const TargetLowering *TLI) {
@@ -741,9 +743,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
 
   // Only one use?  This is a very common case, so we handle it specially and
   // cheaply.
-  const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
-  const SCEV* Result = Zero;
-  const SCEV* FreeResult = Zero;
+  const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
+  const SCEV *Result = Zero;
+  const SCEV *FreeResult = Zero;
   if (NumUses == 1) {
     // If the use is inside the loop, use its base, regardless of what it is:
     // it is clearly shared across all the IV's.  If the use is outside the loop
@@ -759,13 +761,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // Also track whether all uses of each expression can be moved into an
   // an addressing mode "for free"; such expressions are left within the loop.
   // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-  std::map<const SCEV*, SubExprUseData> SubExpressionUseData;
+  std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
   
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
-  SmallVector<const SCEV*, 16> UniqueSubExprs;
+  SmallVector<const SCEV *, 16> UniqueSubExprs;
 
-  SmallVector<const SCEV*, 16> SubExprs;
+  SmallVector<const SCEV *, 16> SubExprs;
   unsigned NumUsesInsideLoop = 0;
   for (unsigned i = 0; i != NumUses; ++i) {
     // If the user is outside the loop, just ignore it for base computation.
@@ -809,7 +811,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   // Now that we know how many times each is used, build Result.  Iterate over
   // UniqueSubexprs so that we have a stable ordering.
   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<const SCEV*, SubExprUseData>::iterator I = 
+    std::map<const SCEV *, SubExprUseData>::iterator I = 
        SubExpressionUseData.find(UniqueSubExprs[i]);
     assert(I != SubExpressionUseData.end() && "Entry not found?");
     if (I->second.Count == NumUsesInsideLoop) { // Found CSE! 
@@ -853,7 +855,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   if (FreeResult != Zero) {
     SeparateSubExprs(SubExprs, FreeResult, SE);
     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      std::map<const SCEV*, SubExprUseData>::iterator I = 
+      std::map<const SCEV *, SubExprUseData>::iterator I = 
          SubExpressionUseData.find(SubExprs[j]);
       SubExpressionUseData.erase(I);
     }
@@ -902,7 +904,8 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
 
   for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
     // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy = Type::VoidTy;
+    const Type *AccessTy =
+        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
     if (isAddressUse(UsersToProcess[i].Inst,
                      UsersToProcess[i].OperandValToReplace))
       AccessTy = getAccessType(UsersToProcess[i].Inst);
@@ -934,7 +937,8 @@ bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
 
   for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
     // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy = Type::VoidTy;
+    const Type *AccessTy =
+        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
     if (isAddressUse(UsersToProcess[i].Inst,
                      UsersToProcess[i].OperandValToReplace))
       AccessTy = getAccessType(UsersToProcess[i].Inst);
@@ -982,10 +986,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
 /// be folded into the addressing mode, nor even that the factor be constant; 
 /// a multiply (executed once) outside the loop is better than another IV 
 /// within.  Well, usually.
-const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 bool AllUsesAreAddresses,
                                 bool AllUsesAreOutsideLoop,
-                                const SCEV* const &Stride, 
+                                const SCEV *const &Stride, 
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
   if (StrideNoReuse.count(Stride))
@@ -995,7 +999,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
           StrideNoReuse.count(SI->first))
@@ -1048,7 +1052,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // an existing IV if we can.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
@@ -1068,7 +1072,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
     // -1*old.
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
-      std::map<const SCEV*, IVsOfOneStride>::iterator SI = 
+      std::map<const SCEV *, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(IU->StrideOrder[NewStride]);
       if (SI == IVsByStride.end()) 
         continue;
@@ -1097,7 +1101,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
 
 /// isNonConstantNegative - Return true if the specified scev is negated, but
 /// not a constant.
-static bool isNonConstantNegative(const SCEV* const &Expr) {
+static bool isNonConstantNegative(const SCEV *const &Expr) {
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
   if (!Mul) return false;
   
@@ -1114,7 +1118,7 @@ static bool isNonConstantNegative(const SCEV* const &Expr) {
 /// of the strided accesses, as well as the old information from Uses. We
 /// progressively move information from the Base field to the Imm field, until
 /// we eventually have the full access expression to rewrite the use.
-const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,
+const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
                                               IVUsersOfOneStride &Uses,
                                               Loop *L,
                                               bool &AllUsesAreAddresses,
@@ -1145,7 +1149,7 @@ const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,
   // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
   // "A+B"), emit it to the preheader, then remove the expression from the
   // UsersToProcess base values.
-  const SCEV* CommonExprs =
+  const SCEV *CommonExprs =
     RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
 
   // Next, figure out what we can represent in the immediate fields of
@@ -1211,7 +1215,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
                                    const std::vector<BasedUser> &UsersToProcess,
                                    const Loop *L,
                                    bool AllUsesAreAddresses,
-                                   const SCEV* Stride) {
+                                   const SCEV *Stride) {
   if (!EnableFullLSRMode)
     return false;
 
@@ -1248,7 +1252,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
         if (!Imm)       Imm = SE->getIntegerSCEV(0, Stride->getType());
         const Instruction *Inst = UsersToProcess[i].Inst;
         const Type *AccessTy = getAccessType(Inst);
-        const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
+        const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
         if (!Diff->isZero() &&
             (!AllUsesAreAddresses ||
              !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
@@ -1282,7 +1286,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
 ///
 /// Return the created phi node.
 ///
-static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,
+static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
                                 Instruction *IVIncInsertPt,
                                 const Loop *L,
                                 SCEVExpander &Rewriter) {
@@ -1302,7 +1306,7 @@ static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,
   // If the stride is negative, insert a sub instead of an add for the
   // increment.
   bool isNegative = isNonConstantNegative(Step);
-  const SCEV* IncAmount = Step;
+  const SCEV *IncAmount = Step;
   if (isNegative)
     IncAmount = Rewriter.SE.getNegativeSCEV(Step);
 
@@ -1341,13 +1345,13 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
   // loop before users outside of the loop with a particular base.
   //
   // We would like to use stable_sort here, but we can't.  The problem is that
-  // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so
+  // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
   // we don't have anything to do a '<' comparison on.  Because we think the
   // number of uses is small, do a horrible bubble sort which just relies on
   // ==.
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
     // Get a base value.
-    const SCEV* Base = UsersToProcess[i].Base;
+    const SCEV *Base = UsersToProcess[i].Base;
 
     // Compact everything with this base to be consecutive with this one.
     for (unsigned j = i+1; j != e; ++j) {
@@ -1366,11 +1370,11 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
 void
 LoopStrengthReduce::PrepareToStrengthReduceFully(
                                         std::vector<BasedUser> &UsersToProcess,
-                                        const SCEV* Stride,
-                                        const SCEV* CommonExprs,
+                                        const SCEV *Stride,
+                                        const SCEV *CommonExprs,
                                         const Loop *L,
                                         SCEVExpander &PreheaderRewriter) {
-  DOUT << "  Fully reducing all users\n";
+  DEBUG(errs() << "  Fully reducing all users\n");
 
   // Rewrite the UsersToProcess records, creating a separate PHI for each
   // unique Base value.
@@ -1379,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
     // TODO: The uses are grouped by base, but not sorted. We arbitrarily
     // pick the first Imm value here to start with, and adjust it for the
     // other uses.
-    const SCEV* Imm = UsersToProcess[i].Imm;
-    const SCEV* Base = UsersToProcess[i].Base;
-    const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm);
+    const SCEV *Imm = UsersToProcess[i].Imm;
+    const SCEV *Base = UsersToProcess[i].Base;
+    const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
     PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
                                    PreheaderRewriter);
     // Loop over all the users with the same base.
@@ -1413,13 +1417,13 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
 void
 LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
                                          std::vector<BasedUser> &UsersToProcess,
-                                         const SCEV* Stride,
-                                         const SCEV* CommonExprs,
+                                         const SCEV *Stride,
+                                         const SCEV *CommonExprs,
                                          Value *CommonBaseV,
                                          Instruction *IVIncInsertPt,
                                          const Loop *L,
                                          SCEVExpander &PreheaderRewriter) {
-  DOUT << "  Inserting new PHI:\n";
+  DEBUG(errs() << "  Inserting new PHI:\n");
 
   PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
                                  Stride, IVIncInsertPt, L,
@@ -1432,9 +1436,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
     UsersToProcess[i].Phi = Phi;
 
-  DOUT << "    IV=";
-  DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false));
-  DOUT << "\n";
+  DEBUG(errs() << "    IV=");
+  DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false));
+  DEBUG(errs() << "\n");
 }
 
 /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
@@ -1447,8 +1451,8 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
                                          Value *CommonBaseV,
                                          const IVExpr &ReuseIV,
                                          Instruction *PreInsertPt) {
-  DOUT << "  Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride
-       << " and BASE " << *ReuseIV.Base << "\n";
+  DEBUG(errs() << "  Rewriting in terms of existing IV of STRIDE "
+               << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
 
   // All the users will share the reused IV.
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
@@ -1490,7 +1494,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
 /// stride of IV.  All of the users may have different starting values, and this
 /// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
+void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
                                                       IVUsersOfOneStride &Uses,
                                                       Loop *L) {
   // If all the users are moved to another stride, then there is nothing to do.
@@ -1513,7 +1517,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // move information from the Base field to the Imm field, until we eventually
   // have the full access expression to rewrite the use.
   std::vector<BasedUser> UsersToProcess;
-  const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
+  const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
                                           AllUsesAreOutsideLoop,
                                           UsersToProcess);
 
@@ -1531,9 +1535,11 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // If all uses are addresses, consider sinking the immediate part of the
   // common expression back into uses if they can fit in the immediate fields.
   if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
-    const SCEV* NewCommon = CommonExprs;
-    const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy);
-    MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE);
+    const SCEV *NewCommon = CommonExprs;
+    const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
+    MoveImmediateValues(TLI, Type::getVoidTy(
+                        L->getLoopPreheader()->getContext()),
+                        NewCommon, Imm, true, L, SE);
     if (!Imm->isZero()) {
       bool DoSink = true;
 
@@ -1548,11 +1554,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       if (GV || Offset)
         // Pass VoidTy as the AccessTy to be conservative, because
         // there could be multiple access types among all the uses.
-        DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy,
+        DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
+                          Type::getVoidTy(L->getLoopPreheader()->getContext()),
                                          UsersToProcess, TLI);
 
       if (DoSink) {
-        DOUT << "  Sinking " << *Imm << " back down into uses\n";
+        DEBUG(errs() << "  Sinking " << *Imm << " back down into uses\n");
         for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
           UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
         CommonExprs = NewCommon;
@@ -1564,9 +1571,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
 
   // Now that we know what we need to do, insert the PHI node itself.
   //
-  DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
-       << *Stride << ":\n"
-       << "  Common base: " << *CommonExprs << "\n";
+  DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
+               << *Stride << ":\n"
+               << "  Common base: " << *CommonExprs << "\n");
 
   SCEVExpander Rewriter(*SE);
   SCEVExpander PreheaderRewriter(*SE);
@@ -1576,11 +1583,13 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   BasicBlock *LatchBlock = L->getLoopLatch();
   Instruction *IVIncInsertPt = LatchBlock->getTerminator();
 
-  Value *CommonBaseV = Context->getNullValue(ReplacedTy);
+  Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
 
-  const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
-  IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
-                   SE->getIntegerSCEV(0, Type::Int32Ty),
+  const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
+  IVExpr   ReuseIV(SE->getIntegerSCEV(0,
+                                    Type::getInt32Ty(Preheader->getContext())),
+                   SE->getIntegerSCEV(0, 
+                                    Type::getInt32Ty(Preheader->getContext())),
                    0);
 
   /// Choose a strength-reduction strategy and prepare for it by creating
@@ -1618,7 +1627,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
   // strength-reduced forms.  This outer loop handles all bases, the inner
   // loop handles all users of a particular base.
   while (!UsersToProcess.empty()) {
-    const SCEV* Base = UsersToProcess.back().Base;
+    const SCEV *Base = UsersToProcess.back().Base;
     Instruction *Inst = UsersToProcess.back().Inst;
 
     // Emit the code for Base into the preheader.
@@ -1626,17 +1635,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
     if (!Base->isZero()) {
       BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
 
-      DOUT << "  INSERTING code for BASE = " << *Base << ":";
+      DEBUG(errs() << "  INSERTING code for BASE = " << *Base << ":");
       if (BaseV->hasName())
-        DOUT << " Result value name = %" << BaseV->getNameStr();
-      DOUT << "\n";
+        DEBUG(errs() << " Result value name = %" << BaseV->getName());
+      DEBUG(errs() << "\n");
 
       // If BaseV is a non-zero constant, make sure that it gets inserted into
       // the preheader, instead of being forward substituted into the uses.  We
       // do this by forcing a BitCast (noop cast) to be inserted into the
       // preheader in this case.
       if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
-          !isa<Instruction>(BaseV)) {
+          isa<Constant>(BaseV)) {
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
@@ -1650,15 +1659,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       // FIXME: Use emitted users to emit other users.
       BasedUser &User = UsersToProcess.back();
 
-      DOUT << "    Examining ";
+      DEBUG(errs() << "    Examining ");
       if (User.isUseOfPostIncrementedValue)
-        DOUT << "postinc";
+        DEBUG(errs() << "postinc");
       else
-        DOUT << "preinc";
-      DOUT << " use ";
-      DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
+        DEBUG(errs() << "preinc");
+      DEBUG(errs() << " use ");
+      DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace,
                            /*PrintType=*/false));
-      DOUT << " in Inst: " << *(User.Inst);
+      DEBUG(errs() << " in Inst: " << *User.Inst);
 
       // If this instruction wants to use the post-incremented value, move it
       // after the post-inc and use its value instead of the PHI.
@@ -1673,7 +1682,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
           User.Inst->moveBefore(IVIncInsertPt);
       }
 
-      const SCEV* RewriteExpr = SE->getUnknown(RewriteOp);
+      const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
 
       if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
           SE->getEffectiveSCEVType(ReplacedTy)) {
@@ -1705,7 +1714,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
         // The base has been used to initialize the PHI node but we don't want
         // it here.
         if (!ReuseIV.Base->isZero()) {
-          const SCEV* typedBase = ReuseIV.Base;
+          const SCEV *typedBase = ReuseIV.Base;
           if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
               SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
             // It's possible the original IV is a larger type than the new IV,
@@ -1770,10 +1779,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
 /// set the IV user and stride information and return true, otherwise return
 /// false.
 bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                                       const SCEV* const * &CondStride) {
+                                       const SCEV *const * &CondStride) {
   for (unsigned Stride = 0, e = IU->StrideOrder.size();
        Stride != e && !CondUse; ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
 
@@ -1800,7 +1809,7 @@ namespace {
     const ScalarEvolution *SE;
     explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
 
-    bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) {
+    bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) {
       const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
       const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
       if (LHSC && RHSC) {
@@ -1843,14 +1852,14 @@ namespace {
 /// if (v1 < 30) goto loop
 ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
                                                 IVStrideUse* &CondUse,
-                                              const SCEV* const* &CondStride) {
+                                              const SCEV *const* &CondStride) {
   // If there's only one stride in the loop, there's nothing to do here.
   if (IU->StrideOrder.size() < 2)
     return Cond;
   // If there are other users of the condition's stride, don't bother
   // trying to change the condition because the stride will still
   // remain.
-  std::map<const SCEV*, IVUsersOfOneStride *>::iterator I =
+  std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
     IU->IVUsesByStride.find(*CondStride);
   if (I == IU->IVUsesByStride.end() ||
       I->second->Users.size() != 1)
@@ -1867,11 +1876,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   const Type *NewCmpTy = NULL;
   unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
   unsigned NewTyBits = 0;
-  const SCEV* *NewStride = NULL;
+  const SCEV **NewStride = NULL;
   Value *NewCmpLHS = NULL;
   Value *NewCmpRHS = NULL;
   int64_t Scale = 1;
-  const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy);
+  const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
 
   if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
     int64_t CmpVal = C->getValue().getSExtValue();
@@ -1883,7 +1892,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
     // Look for a suitable stride / iv as replacement.
     for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
         IU->IVUsesByStride.find(IU->StrideOrder[i]);
       if (!isa<SCEVConstant>(SI->first))
         continue;
@@ -1942,7 +1951,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewCmpTy = NewCmpLHS->getType();
       NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
-      const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits);
+      const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
       if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
         // Check if it is possible to rewrite it using
         // an iv / stride of a smaller integer type.
@@ -1963,7 +1972,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       bool AllUsesAreAddresses = true;
       bool AllUsesAreOutsideLoop = true;
       std::vector<BasedUser> UsersToProcess;
-      const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+      const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
                                               AllUsesAreAddresses,
                                               AllUsesAreOutsideLoop,
                                               UsersToProcess);
@@ -1987,10 +1996,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 
       NewStride = &IU->StrideOrder[i];
       if (!isa<PointerType>(NewCmpTy))
-        NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal);
+        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
       else {
-        Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal);
-        NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy);
+        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
       }
       NewOffset = TyBits == NewTyBits
         ? SE->getMulExpr(CondUse->getOffset(),
@@ -2019,9 +2028,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
     // Create a new compare instruction using new stride / iv.
     ICmpInst *OldCond = Cond;
     // Insert new compare instruction.
-    Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS,
-                        L->getHeader()->getName() + ".termcond",
-                        OldCond);
+    Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
+                        L->getHeader()->getName() + ".termcond");
 
     // Remove the old compare instruction. The old indvar is probably dead too.
     DeadInsts.push_back(CondUse->getOperandValToReplace());
@@ -2098,13 +2106,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
   if (!Sel || !Sel->hasOneUse()) return Cond;
 
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
   if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
@@ -2117,13 +2125,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   if (Max->getNumOperands() != 2)
     return Cond;
 
-  const SCEV* MaxLHS = Max->getOperand(0);
-  const SCEV* MaxRHS = Max->getOperand(1);
+  const SCEV *MaxLHS = Max->getOperand(0);
+  const SCEV *MaxRHS = Max->getOperand(1);
   if (!MaxLHS || MaxLHS != One) return Cond;
 
   // Check the relevant induction variable for conformance to
   // the pattern.
-  const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
   if (!AR || !AR->isAffine() ||
       AR->getStart() != One ||
@@ -2152,7 +2160,7 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   // Ok, everything looks ok to change the condition into an SLT or SGE and
   // delete the max calculation.
   ICmpInst *NewCond =
-    new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
+    new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
 
   // Delete the max calculation instructions.
   Cond->replaceAllUsesWith(NewCond);
@@ -2169,13 +2177,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
 /// inside the loop then try to eliminate the cast opeation.
 void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
 
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return;
-
+    
   for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
        ++Stride) {
-    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
       IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
     assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
     if (!isa<SCEVConstant>(SI->first))
@@ -2209,7 +2217,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       if (TLI) {
         // If target does not support DestTy natively then do not apply
         // this transformation.
-        MVT DVT = TLI->getValueType(DestTy);
+        EVT DVT = TLI->getValueType(DestTy);
         if (!TLI->isTypeLegal(DVT)) continue;
       }
 
@@ -2234,7 +2242,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
-      Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue());
+      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
       BinaryOperator *Incr = 
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2258,7 +2266,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
       /* create new increment. '++d' in above example. */
-      Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue());
+      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
@@ -2294,6 +2302,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   // one register value.
   BasicBlock *LatchBlock = L->getLoopLatch();
   BasicBlock *ExitingBlock = L->getExitingBlock();
+  
   if (!ExitingBlock)
     // Multiple exits, just look at the exit in the latch block if there is one.
     ExitingBlock = LatchBlock;
@@ -2305,7 +2314,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
 
   // Search IVUsesByStride to find Cond's IVUse if there is one.
   IVStrideUse *CondUse = 0;
-  const SCEV* const *CondStride = 0;
+  const SCEV *const *CondStride = 0;
   ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
   if (!FindIVUserForCond(Cond, CondUse, CondStride))
     return; // setcc doesn't use the IV.
@@ -2335,7 +2344,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
       int64_t SInt = SC->getValue()->getSExtValue();
       for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
            ++NewStride) {
-        std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
           IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
         if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
           continue;
@@ -2349,7 +2358,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
         bool AllUsesAreAddresses = true;
         bool AllUsesAreOutsideLoop = true;
         std::vector<BasedUser> UsersToProcess;
-        const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
                                                 AllUsesAreAddresses,
                                                 AllUsesAreOutsideLoop,
                                                 UsersToProcess);
@@ -2410,7 +2419,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
 void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // If the number of times the loop is executed isn't computable, give up.
-  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return;
 
@@ -2439,9 +2448,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   // Handle only tests for equality for the moment, and only stride 1.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ)
     return;
-  const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
   if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
     return;
   // If the RHS of the comparison is defined inside the loop, the rewrite
@@ -2497,7 +2506,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   Value *startVal = phi->getIncomingValue(inBlock);
   Value *endVal = Cond->getOperand(1);
   // FIXME check for case where both are constant
-  Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0);
+  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
   BinaryOperator *NewStartVal = 
     BinaryOperator::Create(Instruction::Sub, endVal, startVal,
                            "tmp", PreInsertPt);
@@ -2516,11 +2525,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
   Changed = false;
 
   if (!IU->IVUsesByStride.empty()) {
-#ifndef NDEBUG
-    DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
-         << "\" ";
-    DEBUG(L->dump());
-#endif
+    DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
+          << "\" ";
+          L->dump());
 
     // Sort the StrideOrder so we process larger strides first.
     std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
@@ -2557,7 +2564,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
     // strides deterministic - not dependent on map order.
     for (unsigned Stride = 0, e = IU->StrideOrder.size();
          Stride != e; ++Stride) {
-      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
+      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
         IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
       assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
       // FIXME: Generalize to non-affine IV's.
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
index 23757cd..837ec59 100644
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ b/lib/Transforms/Scalar/LoopUnroll.cpp
@@ -17,9 +17,9 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <climits>
 
@@ -39,7 +39,7 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
            "-unroll-threshold loop size is reached."));
 
 namespace {
-  class VISIBILITY_HIDDEN LoopUnroll : public LoopPass {
+  class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopUnroll() : LoopPass(&ID) {}
@@ -96,10 +96,7 @@ static unsigned ApproximateLoopSize(const Loop *L) {
         // is higher than other instructions. Here 3 and 10 are magic
         // numbers that help one isolated test case from PR2067 without
         // negatively impacting measured benchmarks.
-        if (isa<IntrinsicInst>(I))
-          Size = Size + 3;
-        else
-          Size = Size + 10;
+        Size += isa<IntrinsicInst>(I) ? 3 : 10;
       } else {
         ++Size;
       }
@@ -118,51 +115,48 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
 
   BasicBlock *Header = L->getHeader();
-  DOUT << "Loop Unroll: F[" << Header->getParent()->getName()
-       << "] Loop %" << Header->getName() << "\n";
+  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
+        << "] Loop %" << Header->getName() << "\n");
+  (void)Header;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
   unsigned Count = UnrollCount;
- 
+
   // Automatically select an unroll count.
   if (Count == 0) {
     // Conservative heuristic: if we know the trip count, see if we can
     // completely unroll (subject to the threshold, checked below); otherwise
-    // try to find greatest modulo of the trip count which is still under 
+    // try to find greatest modulo of the trip count which is still under
     // threshold value.
-    if (TripCount != 0) {
-      Count = TripCount;
-    } else {
+    if (TripCount == 0)
       return false;
-    }
+    Count = TripCount;
   }
 
   // Enforce the threshold.
   if (UnrollThreshold != NoThreshold) {
     unsigned LoopSize = ApproximateLoopSize(L);
-    DOUT << "  Loop Size = " << LoopSize << "\n";
+    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");
     uint64_t Size = (uint64_t)LoopSize*Count;
     if (TripCount != 1 && Size > UnrollThreshold) {
-      DOUT << "  Too large to fully unroll with count: " << Count
-           << " because size: " << Size << ">" << UnrollThreshold << "\n";
-      if (UnrollAllowPartial) {
-        // Reduce unroll count to be modulo of TripCount for partial unrolling
-        Count = UnrollThreshold / LoopSize;        
-        while (Count != 0 && TripCount%Count != 0) {
-          Count--;
-        }        
-        if (Count < 2) {
-          DOUT << "  could not unroll partially\n";
-          return false;
-        } else {
-          DOUT << "  partially unrolling with count: " << Count << "\n";
-        }
-      } else {
-        DOUT << "  will not try to unroll partially because "
-             << "-unroll-allow-partial not given\n";
+      DEBUG(errs() << "  Too large to fully unroll with count: " << Count
+            << " because size: " << Size << ">" << UnrollThreshold << "\n");
+      if (!UnrollAllowPartial) {
+        DEBUG(errs() << "  will not try to unroll partially because "
+              << "-unroll-allow-partial not given\n");
+        return false;
+      }
+      // Reduce unroll count to be modulo of TripCount for partial unrolling
+      Count = UnrollThreshold / LoopSize;
+      while (Count != 0 && TripCount%Count != 0) {
+        Count--;
+      }
+      if (Count < 2) {
+        DEBUG(errs() << "  could not unroll partially\n");
         return false;
       }
+      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");
     }
   }
 
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index de5eedf..f6de362 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -44,8 +45,8 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
@@ -56,12 +57,14 @@ STATISTIC(NumSelects , "Number of selects unswitched");
 STATISTIC(NumTrivial , "Number of unswitches that are trivial");
 STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
 
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
 static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
-          cl::init(10), cl::Hidden);
+          cl::init(50), cl::Hidden);
   
 namespace {
-  class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass {
+  class LoopUnswitch : public LoopPass {
     LoopInfo *LI;  // Loop information
     LPPassManager *LPM;
 
@@ -112,6 +115,10 @@ namespace {
 
   private:
 
+    virtual void releaseMemory() {
+      UnswitchedVals.clear();
+    }
+
     /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
     /// remove it.
     void RemoveLoopFromWorklist(Loop *L) {
@@ -168,8 +175,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
   if (isa<Constant>(Cond)) return 0;
 
   // TODO: Handle: br (VARIANT|INVARIANT).
-  // TODO: Hoist simple expressions out of loops.
-  if (L->isLoopInvariant(Cond)) return Cond;
+
+  // Hoist simple values out.
+  if (L->makeLoopInvariant(Cond, Changed))
+    return Cond;
 
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
     if (BO->getOpcode() == Instruction::And ||
@@ -214,6 +223,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
 /// and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
+  LLVMContext &Context = currentLoop->getHeader()->getContext();
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
   // block that is branching on a loop-invariant condition, we can unswitch this
@@ -231,7 +241,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             Context->getConstantIntTrue())) {
+                                             ConstantInt::getTrue(Context))) {
           ++NumBranches;
           return true;
         }
@@ -261,7 +271,7 @@ bool LoopUnswitch::processCurrentLoop() {
         Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
                                                currentLoop, Changed);
         if (LoopCond && UnswitchIfProfitable(LoopCond, 
-                                             Context->getConstantIntTrue())) {
+                                             ConstantInt::getTrue(Context))) {
           ++NumSelects;
           return true;
         }
@@ -335,6 +345,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
                                        BasicBlock **LoopExit) {
   BasicBlock *Header = currentLoop->getHeader();
   TerminatorInst *HeaderTerm = Header->getTerminator();
+  LLVMContext &Context = Header->getContext();
   
   BasicBlock *LoopExitBB = 0;
   if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
@@ -349,10 +360,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     // this.
     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                              BI->getSuccessor(0)))) {
-      if (Val) *Val = Context->getConstantIntTrue();
+      if (Val) *Val = ConstantInt::getTrue(Context);
     } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                                     BI->getSuccessor(1)))) {
-      if (Val) *Val = Context->getConstantIntFalse();
+      if (Val) *Val = ConstantInt::getFalse(Context);
     }
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
     // If this isn't a switch on Cond, we can't handle it.
@@ -398,29 +409,14 @@ unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
   if (IsTrivialUnswitchCondition(LIC))
     return 0;
   
-  // FIXME: This is really overly conservative.  However, more liberal 
-  // estimations have thus far resulted in excessive unswitching, which is bad
-  // both in compile time and in code size.  This should be replaced once
-  // someone figures out how a good estimation.
-  return currentLoop->getBlocks().size();
-  
-  unsigned Cost = 0;
-  // FIXME: this is brain dead.  It should take into consideration code
-  // shrinkage.
+  // FIXME: This is overly conservative because it does not take into
+  // consideration code simplification opportunities.
+  CodeMetrics Metrics;
   for (Loop::block_iterator I = currentLoop->block_begin(), 
          E = currentLoop->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    // Do not include empty blocks in the cost calculation.  This happen due to
-    // loop canonicalization and will be removed.
-    if (BB->begin() == BasicBlock::iterator(BB->getTerminator()))
-      continue;
-    
-    // Count basic blocks.
-    ++Cost;
-  }
-
-  return Cost;
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+  return Metrics.NumInsts;
 }
 
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
@@ -445,9 +441,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
     // FIXME: this should estimate growth by the amount of code shared by the
     // resultant unswitched loops.
     //
-    DOUT << "NOT unswitching loop %"
-         << currentLoop->getHeader()->getName() << ", cost too high: "
-         << currentLoop->getBlocks().size() << "\n";
+    DEBUG(errs() << "NOT unswitching loop %"
+          << currentLoop->getHeader()->getName() << ", cost too high: "
+          << currentLoop->getBlocks().size() << "\n");
     return false;
   }
 
@@ -506,14 +502,20 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
   // Insert a conditional branch on LIC to the two preheaders.  The original
   // code is the true version and the new code is the false version.
   Value *BranchVal = LIC;
-  if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty)
-    BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt);
-  else if (Val != Context->getConstantIntTrue())
+  if (!isa<ConstantInt>(Val) ||
+      Val->getType() != Type::getInt1Ty(LIC->getContext()))
+    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+  else if (Val != ConstantInt::getTrue(Val->getContext()))
     // We want to enter the new loop when the condition is true.
     std::swap(TrueDest, FalseDest);
 
   // Insert the new branch.
-  BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+  // If either edge is critical, split it. This helps preserve LoopSimplify
+  // form for enclosing loops.
+  SplitCriticalEdge(BI, 0, this);
+  SplitCriticalEdge(BI, 1, this);
 }
 
 /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
@@ -524,10 +526,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
                                             Constant *Val, 
                                             BasicBlock *ExitBlock) {
-  DOUT << "loop-unswitch: Trivial-Unswitch loop %"
-       << loopHeader->getName() << " [" << L->getBlocks().size()
-       << " blocks] in Function " << L->getHeader()->getParent()->getName()
-       << " on cond: " << *Val << " == " << *Cond << "\n";
+  DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << L->getHeader()->getParent()->getName()
+        << " on cond: " << *Val << " == " << *Cond << "\n");
   
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
@@ -570,47 +572,11 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
-    std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock));
-
-    for (unsigned j = 0, e = Preds.size(); j != e; ++j) {
-      BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this);
-      BasicBlock* StartBlock = Preds[j];
-      BasicBlock* EndBlock;
-      if (NewExitBlock->getSinglePredecessor() == ExitBlock) {
-        EndBlock = NewExitBlock;
-        NewExitBlock = EndBlock->getSinglePredecessor();
-      } else {
-        EndBlock = ExitBlock;
-      }
-      
-      std::set<PHINode*> InsertedPHIs;
-      PHINode* OldLCSSA = 0;
-      for (BasicBlock::iterator I = EndBlock->begin();
-           (OldLCSSA = dyn_cast<PHINode>(I)); ++I) {
-        Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock);
-        PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(),
-                                            OldLCSSA->getName() + ".us-lcssa",
-                                            NewExitBlock->getTerminator());
-        NewLCSSA->addIncoming(OldValue, StartBlock);
-        OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock),
-                                   NewLCSSA);
-        InsertedPHIs.insert(NewLCSSA);
-      }
-
-      BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI();
-      for (BasicBlock::iterator I = NewExitBlock->begin();
-         (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0;
-         ++I) {
-        PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(),
-                                            OldLCSSA->getName() + ".us-lcssa",
-                                            InsertPt);
-        OldLCSSA->replaceAllUsesWith(NewLCSSA);
-        NewLCSSA->addIncoming(OldLCSSA, NewExitBlock);
-      }
-
-    }    
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+                                       pred_end(ExitBlock));
+    SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+                           ".us-lcssa", this);
   }
-
 }
 
 /// UnswitchNontrivialCondition - We determined that the loop is profitable 
@@ -619,10 +585,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
                                                Loop *L) {
   Function *F = loopHeader->getParent();
-  DOUT << "loop-unswitch: Unswitching loop %"
-       << loopHeader->getName() << " [" << L->getBlocks().size()
-       << " blocks] in Function " << F->getName()
-       << " when '" << *Val << "' == " << *LIC << "\n";
+  DEBUG(errs() << "loop-unswitch: Unswitching loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << F->getName()
+        << " when '" << *Val << "' == " << *LIC << "\n");
 
   LoopBlocks.clear();
   NewBlocks.clear();
@@ -745,7 +711,7 @@ static void RemoveFromWorklist(Instruction *I,
 static void ReplaceUsesOfWith(Instruction *I, Value *V, 
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
-  DOUT << "Replace with '" << *V << "': " << *I;
+  DEBUG(errs() << "Replace with '" << *V << "': " << *I);
 
   // Add uses to the worklist, which may be dead now.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -788,7 +754,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
           // dominates the latch).
           LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
           Pred->getTerminator()->eraseFromParent();
-          new UnreachableInst(Pred);
+          new UnreachableInst(BB->getContext(), Pred);
           
           // The loop is now broken, remove it from LI.
           RemoveLoopFromHierarchy(L);
@@ -807,7 +773,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     return;
   }
 
-  DOUT << "Nuking dead block: " << *BB;
+  DEBUG(errs() << "Nuking dead block: " << *BB);
   
   // Remove the instructions in the basic block from the worklist.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
@@ -815,8 +781,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     
     // Anything that uses the instructions in this basic block should have their
     // uses replaced with undefs.
-    if (!I->use_empty())
-      I->replaceAllUsesWith(Context->getUndef(I->getType()));
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I->getType()->isVoidTy())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
   }
   
   // If this is the edge to the header block for a loop, remove the loop and
@@ -897,15 +865,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   // selects, switches.
   std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
   std::vector<Instruction*> Worklist;
+  LLVMContext &Context = Val->getContext();
+
 
   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
   // in the loop with the appropriate one directly.
-  if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) {
+  if (IsEqual || (isa<ConstantInt>(Val) &&
+      Val->getType() == Type::getInt1Ty(Val->getContext()))) {
     Value *Replacement;
     if (IsEqual)
       Replacement = Val;
     else
-      Replacement = Context->getConstantInt(Type::Int1Ty, 
+      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
                                      !cast<ConstantInt>(Val)->getZExtValue());
     
     for (unsigned i = 0, e = Users.size(); i != e; ++i)
@@ -937,27 +908,35 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
               // FIXME: This is a hack.  We need to keep the successor around
               // and hooked up so as to preserve the loop structure, because
               // trying to update it is complicated.  So instead we preserve the
-              // loop structure and put the block on an dead code path.
-              
-              BasicBlock *SISucc = SI->getSuccessor(i);
-              BasicBlock* Old = SI->getParent();
-              BasicBlock* Split = SplitBlock(Old, SI, this);
-              
-              Instruction* OldTerm = Old->getTerminator();
-              BranchInst::Create(Split, SISucc,
-                                 Context->getConstantIntTrue(), OldTerm);
-
-              LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
-              Old->getTerminator()->eraseFromParent();
-              
-              PHINode *PN;
-              for (BasicBlock::iterator II = SISucc->begin();
-                   (PN = dyn_cast<PHINode>(II)); ++II) {
-                Value *InVal = PN->removeIncomingValue(Split, false);
-                PN->addIncoming(InVal, Old);
-              }
-
-              SI->removeCase(i);
+              // loop structure and put the block on a dead code path.
+              BasicBlock *Switch = SI->getParent();
+              SplitEdge(Switch, SI->getSuccessor(i), this);
+              // Compute the successors instead of relying on the return value
+              // of SplitEdge, since it may have split the switch successor
+              // after PHI nodes.
+              BasicBlock *NewSISucc = SI->getSuccessor(i);
+              BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+              // Create an "unreachable" destination.
+              BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+                                                     Switch->getParent(),
+                                                     OldSISucc);
+              new UnreachableInst(Context, Abort);
+              // Force the new case destination to branch to the "unreachable"
+              // block while maintaining a (dead) CFG edge to the old block.
+              NewSISucc->getTerminator()->eraseFromParent();
+              BranchInst::Create(Abort, OldSISucc,
+                                 ConstantInt::getTrue(Context), NewSISucc);
+              // Release the PHI operands for this edge.
+              for (BasicBlock::iterator II = NewSISucc->begin();
+                   PHINode *PN = dyn_cast<PHINode>(II); ++II)
+                PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+                                     UndefValue::get(PN->getType()));
+              // Tell the domtree about the new block. We don't fully update the
+              // domtree here -- instead we force it to do a full recomputation
+              // after the pass is complete -- but we do need to inform it of
+              // new blocks.
+              if (DT)
+                DT->addNewBlock(Abort, NewSISucc);
               break;
             }
           }
@@ -971,7 +950,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   SimplifyCode(Worklist, L);
 }
 
-/// SimplifyCode - Okay, now that we have simplified some instructions in the 
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
 /// loop, walk over it and constant prop, dce, and fold control flow where
 /// possible.  Note that this is effectively a very simple loop-structure-aware
 /// optimizer.  During processing of this loop, L could very well be deleted, so
@@ -986,14 +965,14 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     Worklist.pop_back();
     
     // Simple constant folding.
-    if (Constant *C = ConstantFoldInstruction(I)) {
+    if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
       ReplaceUsesOfWith(I, C, Worklist, L, LPM);
       continue;
     }
     
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
-      DOUT << "Remove dead instruction '" << *I;
+      DEBUG(errs() << "Remove dead instruction '" << *I);
       
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1017,10 +996,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       break;
     case Instruction::And:
       if (isa<ConstantInt>(I->getOperand(0)) && 
-          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
+          // constant -> RHS
+          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))   
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType() == Type::Int1Ty) {
+        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
           if (CB->isOne())      // X & 1 -> X
             ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
           else                  // X & 0 -> 0
@@ -1030,10 +1010,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       break;
     case Instruction::Or:
       if (isa<ConstantInt>(I->getOperand(0)) &&
-          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
+          // constant -> RHS
+          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType() == Type::Int1Ty) {
+        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
           if (CB->isOne())   // X | 1 -> 1
             ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
           else                  // X | 0 -> X
@@ -1052,8 +1033,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         if (!SinglePred) continue;  // Nothing to do.
         assert(SinglePred == Pred && "CFG broken");
 
-        DOUT << "Merging blocks: " << Pred->getName() << " <- " 
-             << Succ->getName() << "\n";
+        DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " 
+              << Succ->getName() << "\n");
         
         // Resolve any single entry PHI nodes in Succ.
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
@@ -1080,7 +1061,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         // remove dead blocks.
         break;  // FIXME: Enable.
 
-        DOUT << "Folded branch: " << *BI;
+        DEBUG(errs() << "Folded branch: " << *BI);
         BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
         BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
         DeadSucc->removePredecessor(BI->getParent(), true);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3c7a5ab..c922814 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -24,29 +24,33 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include <list>
 using namespace llvm;
 
 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
 
 /// isBytewiseValue - If the specified value can be set by repeating the same
 /// byte in memory, return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
 /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
 /// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V, LLVMContext* Context) {
+static Value *isBytewiseValue(Value *V) {
+  LLVMContext &Context = V->getContext();
+  
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType() == Type::Int8Ty) return V;
+  if (V->getType() == Type::getInt8Ty(Context)) return V;
   
   // Constant float and double values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0. 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType() == Type::FloatTy)
-      V = Context->getConstantExprBitCast(CFP, Type::Int32Ty);
-    if (CFP->getType() == Type::DoubleTy)
-      V = Context->getConstantExprBitCast(CFP, Type::Int64Ty);
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
     // Don't handle long double formats, which have strange constraints.
   }
   
@@ -69,7 +73,7 @@ static Value *isBytewiseValue(Value *V, LLVMContext* Context) {
         if (Val != Val2)
           return 0;
       }
-      return Context->getConstantInt(Val);
+      return ConstantInt::get(Context, Val);
     }
   }
   
@@ -271,6 +275,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
   if (Start < I->Start) {
     I->Start = Start;
     I->StartPtr = SI->getPointerOperand();
+    I->Alignment = SI->getAlignment();
   }
     
   // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -295,8 +300,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-
-  class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass {
+  class MemCpyOpt : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -309,16 +313,15 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetData>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
-      AU.addPreserved<TargetData>();
     }
   
     // Helper fuctions
-    bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
-    bool processMemCpy(MemCpyInst* M);
-    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
+    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemCpy(MemCpyInst *M);
+    bool processMemMove(MemMoveInst *M);
+    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
     bool iterateOnFunction(Function &F);
   };
   
@@ -337,27 +340,31 @@ static RegisterPass<MemCpyOpt> X("memcpyopt",
 /// some other patterns to fold away.  In particular, this looks for stores to
 /// neighboring locations of memory.  If it sees enough consequtive ones
 /// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (SI->isVolatile()) return false;
   
+  LLVMContext &Context = SI->getContext();
+
   // There are two cases that are interesting for this code to handle: memcpy
   // and memset.  Right now we only handle memset.
   
   // Ensure that the value being stored is something that can be memset'able a
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context);
+  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
   if (!ByteVal)
     return false;
 
-  TargetData &TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false;
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  Module *M = SI->getParent()->getParent()->getParent();
 
   // Okay, so we now have a single store that can be splatable.  Scan to find
   // all subsequent stores of the same value to offset from the same pointer.
   // Join these together into ranges, so we can decide whether contiguous blocks
   // are stored.
-  MemsetRanges Ranges(TD);
+  MemsetRanges Ranges(*TD);
   
   Value *StartPtr = SI->getPointerOperand();
   
@@ -385,12 +392,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     if (NextStore->isVolatile()) break;
     
     // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context))
+    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
       break;
 
     // Check to see if this store is to a constant offset from the start ptr.
     int64_t Offset;
-    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
+    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
       break;
 
     Ranges.addStore(Offset, NextStore);
@@ -405,7 +412,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
   // store as well.  We try to avoid this unless there is at least something
   // interesting as a small compile-time optimization.
   Ranges.addStore(0, SI);
-
   
   Function *MemSetF = 0;
   
@@ -419,7 +425,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     if (Range.TheStores.size() == 1) continue;
     
     // If it is profitable to lower this range to memset, do so now.
-    if (!Range.isProfitableToUseMemset(TD))
+    if (!Range.isProfitableToUseMemset(*TD))
       continue;
     
     // Otherwise, we do want to transform this!  Create a new memset.  We put
@@ -429,37 +435,38 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
     BasicBlock::iterator InsertPt = BI;
   
     if (MemSetF == 0) {
-      const Type *Tys[] = {Type::Int64Ty};
-      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
-                                          ->getParent(), Intrinsic::memset,
-                                          Tys, 1);
-   }
+      const Type *Ty = Type::getInt64Ty(Context);
+      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
+    }
     
     // Get the starting pointer of the block.
     StartPtr = Range.StartPtr;
   
     // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty);
+    const Type *i8Ptr = Type::getInt8PtrTy(Context);
     if (StartPtr->getType() != i8Ptr)
-      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
+      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                  InsertPt);
   
     Value *Ops[] = {
       StartPtr, ByteVal,   // Start, value
-      Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start),  // size
-      Context->getConstantInt(Type::Int32Ty, Range.Alignment)   // align
+      // size
+      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
+      // align
+      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
     };
     Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
-    DEBUG(cerr << "Replace stores:\n";
+    DEBUG(errs() << "Replace stores:\n";
           for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
-            cerr << *Range.TheStores[i];
-          cerr << "With: " << *C); C=C;
+            errs() << *Range.TheStores[i];
+          errs() << "With: " << *C); C=C;
   
     // Don't invalidate the iterator
     BBI = BI;
   
     // Zap all the stores.
-    for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+    for (SmallVector<StoreInst*, 16>::const_iterator
+         SI = Range.TheStores.begin(),
          SE = Range.TheStores.end(); SI != SE; ++SI)
       (*SI)->eraseFromParent();
     ++NumMemSetInfer;
@@ -490,29 +497,30 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Deliberately get the source and destination with bitcasts stripped away,
   // because we'll need to do type comparisons based on the underlying type.
-  Value* cpyDest = cpy->getDest();
-  Value* cpySrc = cpy->getSource();
+  Value *cpyDest = cpy->getDest();
+  Value *cpySrc = cpy->getSource();
   CallSite CS = CallSite::get(C);
 
   // We need to be able to reason about the size of the memcpy, so we require
   // that it be a constant.
-  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
   if (!cpyLength)
     return false;
 
   // Require that src be an alloca.  This simplifies the reasoning considerably.
-  AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
   if (!srcAlloca)
     return false;
 
   // Check that all of src is copied to dest.
-  TargetData& TD = getAnalysis<TargetData>();
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false;
 
-  ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
   if (!srcArraySize)
     return false;
 
-  uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
     srcArraySize->getZExtValue();
 
   if (cpyLength->getZExtValue() < srcSize)
@@ -521,25 +529,25 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // Check that accessing the first srcSize bytes of dest will not cause a
   // trap.  Otherwise the transform is invalid since it might cause a trap
   // to occur earlier than it otherwise would.
-  if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) {
+  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
     // The destination is an alloca.  Check it is larger than srcSize.
-    ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
     if (!destArraySize)
       return false;
 
-    uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
+    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
       destArraySize->getZExtValue();
 
     if (destSize < srcSize)
       return false;
-  } else if (Argument* A = dyn_cast<Argument>(cpyDest)) {
+  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
     // If the destination is an sret parameter then only accesses that are
     // outside of the returned struct type can trap.
     if (!A->hasStructRetAttr())
       return false;
 
-    const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
-    uint64_t destSize = TD.getTypeAllocSize(StructTy);
+    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+    uint64_t destSize = TD->getTypeAllocSize(StructTy);
 
     if (destSize < srcSize)
       return false;
@@ -554,14 +562,14 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
                                    srcAlloca->use_end());
   while (!srcUseList.empty()) {
-    User* UI = srcUseList.back();
+    User *UI = srcUseList.back();
     srcUseList.pop_back();
 
     if (isa<BitCastInst>(UI)) {
       for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
            I != E; ++I)
         srcUseList.push_back(*I);
-    } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
       if (G->hasAllZeroIndices())
         for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
              I != E; ++I)
@@ -575,8 +583,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Since we're changing the parameter to the callsite, we need to make sure
   // that what would be the new parameter dominates the callsite.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
-  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
     if (!DT.dominates(cpyDestInst, C))
       return false;
 
@@ -584,7 +592,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // unexpected manner, for example via a global, which we deduce from
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
-  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
       AliasAnalysis::NoModRef)
     return false;
@@ -597,11 +605,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
         cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                               cpyDest->getName(), C);
       changedArgument = true;
-      if (CS.getArgument(i)->getType() != cpyDest->getType())
-        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
-                       CS.getArgument(i)->getType(), cpyDest->getName(), C));
-      else
+      if (CS.getArgument(i)->getType() == cpyDest->getType())
         CS.setArgument(i, cpyDest);
+      else
+        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
+                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
     }
 
   if (!changedArgument)
@@ -609,7 +617,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Drop any cached information about the call, because we may have changed
   // its dependence information by changing its parameter.
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
   MD.removeInstruction(C);
 
   // Remove the memcpy
@@ -624,22 +632,22 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
 /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
 ///  This allows later passes to remove the first memcpy altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
 
   // The are two possible optimizations we can do for memcpy:
-  //   a) memcpy-memcpy xform which exposes redundance for DSE
-  //   b) call-memcpy xform for return slot optimization
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
   MemDepResult dep = MD.getDependency(M);
   if (!dep.isClobber())
     return false;
   if (!isa<MemCpyInst>(dep.getInst())) {
-    if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
+    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
       return performCallSlotOptzn(M, C);
     return false;
   }
   
-  MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
+  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
   
   // We can only transforms memcpy's where the dest of one is the source of the
   // other
@@ -648,8 +656,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   
   // Second, the length of the memcpy's must be the same, or the preceeding one
   // must be larger than the following one.
-  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
-  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
+  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
   if (!C1 || !C2)
     return false;
   
@@ -661,7 +669,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   
   // Finally, we have to make sure that the dest of the second does not
   // alias the source of the first
-  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
       AliasAnalysis::NoAlias)
     return false;
@@ -673,17 +681,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
     return false;
   
   // If all checks passed, then we can transform these memcpy's
-  const Type *Tys[1];
-  Tys[0] = M->getLength()->getType();
-  Function* MemCpyFun = Intrinsic::getDeclaration(
+  const Type *Ty = M->getLength()->getType();
+  Function *MemCpyFun = Intrinsic::getDeclaration(
                                  M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID(), Tys, 1);
+                                 M->getIntrinsicID(), &Ty, 1);
     
   Value *Args[4] = {
     M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
   };
   
-  CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
   
   
   // If C and M don't interfere, then this is a valid transformation.  If they
@@ -702,41 +709,78 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
   return false;
 }
 
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
-bool MemCpyOpt::runOnFunction(Function& F) {
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // If the memmove is a constant size, use it for the alias query, this allows
+  // us to optimize things like: memmove(P, P+64, 64);
+  uint64_t MemMoveSize = ~0ULL;
+  if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
+    MemMoveSize = Len->getZExtValue();
   
-  bool changed = false;
-  bool shouldContinue = true;
+  // See if the pointers alias.
+  if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
+      AliasAnalysis::NoAlias)
+    return false;
   
-  while (shouldContinue) {
-    shouldContinue = iterateOnFunction(F);
-    changed |= shouldContinue;
-  }
+  DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
   
-  return changed;
-}
+  // If not, then we know we can transform this.
+  Module *Mod = M->getParent()->getParent()->getParent();
+  const Type *Ty = M->getLength()->getType();
+  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
 
+  // MemDep may have over conservative information about this instruction, just
+  // conservatively flush it from the cache.
+  getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
 
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
+  ++NumMoveToCpy;
+  return true;
+}
+  
+
+// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
 bool MemCpyOpt::iterateOnFunction(Function &F) {
-  bool changed_function = false;
+  bool MadeChange = false;
 
-  // Walk all instruction in the function
+  // Walk all instruction in the function.
   for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
     for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
          BI != BE;) {
-      // Avoid invalidating the iterator
-      Instruction* I = BI++;
+      // Avoid invalidating the iterator.
+      Instruction *I = BI++;
       
       if (StoreInst *SI = dyn_cast<StoreInst>(I))
-        changed_function |= processStore(SI, BI);
-      else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
-        changed_function |= processMemCpy(M);
+        MadeChange |= processStore(SI, BI);
+      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+        MadeChange |= processMemCpy(M);
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
+        if (processMemMove(M)) {
+          --BI;         // Reprocess the new memcpy.
+          MadeChange = true;
+        }
       }
     }
   }
   
-  return changed_function;
+  return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+  bool MadeChange = false;
+  while (1) {
+    if (!iterateOnFunction(F))
+      break;
+    MadeChange = true;
+  }
+  
+  return MadeChange;
 }
+
+
+
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index fa60a9d..e6ffac2 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -31,9 +31,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <algorithm>
@@ -46,7 +46,7 @@ STATISTIC(NumAnnihil, "Number of expr tree annihilated");
 STATISTIC(NumFactor , "Number of multiplies factored");
 
 namespace {
-  struct VISIBILITY_HIDDEN ValueEntry {
+  struct ValueEntry {
     unsigned Rank;
     Value *Op;
     ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
@@ -61,17 +61,17 @@ namespace {
 ///
 static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) {
   Module *M = I->getParent()->getParent()->getParent();
-  cerr << Instruction::getOpcodeName(I->getOpcode()) << " "
+  errs() << Instruction::getOpcodeName(I->getOpcode()) << " "
        << *Ops[0].Op->getType();
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M);
-    cerr << "," << Ops[i].Rank;
+    WriteAsOperand(errs() << " ", Ops[i].Op, false, M);
+    errs() << "," << Ops[i].Rank;
   }
 }
 #endif
   
 namespace {
-  class VISIBILITY_HIDDEN Reassociate : public FunctionPass {
+  class Reassociate : public FunctionPass {
     std::map<BasicBlock*, unsigned> RankMap;
     std::map<AssertingVH<>, unsigned> ValueRankMap;
     bool MadeChange;
@@ -181,8 +181,8 @@ unsigned Reassociate::getRank(Value *V) {
       (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
     ++Rank;
 
-  //DOUT << "Calculated Rank[" << V->getName() << "] = "
-  //     << Rank << "\n";
+  //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = "
+  //     << Rank << "\n");
 
   return CachedRank = Rank;
 }
@@ -200,8 +200,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
 ///
 static Instruction *LowerNegateToMultiply(Instruction *Neg,
                               std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext* Context) {
-  Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType());
+                              LLVMContext &Context) {
+  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
 
   Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
   ValueRankMap.erase(Neg);
@@ -222,7 +222,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
          isReassociableOp(RHS, I->getOpcode()) &&
          "Not an expression that needs linearization?");
 
-  DOUT << "Linear" << *LHS << *RHS << *I;
+  DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
 
   // Move the RHS instruction to live immediately before I, avoiding breaking
   // dominator properties.
@@ -235,7 +235,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
 
   ++NumLinear;
   MadeChange = true;
-  DOUT << "Linearized: " << *I;
+  DEBUG(errs() << "Linearized: " << *I << '\n');
 
   // If D is part of this expression tree, tail recurse.
   if (isReassociableOp(I->getOperand(1), I->getOpcode()))
@@ -256,6 +256,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
                                     std::vector<ValueEntry> &Ops) {
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
   unsigned Opcode = I->getOpcode();
+  LLVMContext &Context = I->getContext();
 
   // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
   BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -284,8 +285,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
       Ops.push_back(ValueEntry(getRank(RHS), RHS));
       
       // Clear the leaves out.
-      I->setOperand(0, Context->getUndef(I->getType()));
-      I->setOperand(1, Context->getUndef(I->getType()));
+      I->setOperand(0, UndefValue::get(I->getType()));
+      I->setOperand(1, UndefValue::get(I->getType()));
       return;
     } else {
       // Turn X+(Y+Z) -> (Y+Z)+X
@@ -320,7 +321,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
   Ops.push_back(ValueEntry(getRank(RHS), RHS));
   
   // Clear the RHS leaf out.
-  I->setOperand(1, Context->getUndef(I->getType()));
+  I->setOperand(1, UndefValue::get(I->getType()));
 }
 
 // RewriteExprTree - Now that the operands for this expression tree are
@@ -333,10 +334,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
     if (I->getOperand(0) != Ops[i].Op ||
         I->getOperand(1) != Ops[i+1].Op) {
       Value *OldLHS = I->getOperand(0);
-      DOUT << "RA: " << *I;
+      DEBUG(errs() << "RA: " << *I << '\n');
       I->setOperand(0, Ops[i].Op);
       I->setOperand(1, Ops[i+1].Op);
-      DOUT << "TO: " << *I;
+      DEBUG(errs() << "TO: " << *I << '\n');
       MadeChange = true;
       ++NumChanged;
       
@@ -349,9 +350,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
   assert(i+2 < Ops.size() && "Ops index out of range!");
 
   if (I->getOperand(1) != Ops[i].Op) {
-    DOUT << "RA: " << *I;
+    DEBUG(errs() << "RA: " << *I << '\n');
     I->setOperand(1, Ops[i].Op);
-    DOUT << "TO: " << *I;
+    DEBUG(errs() << "TO: " << *I << '\n');
     MadeChange = true;
     ++NumChanged;
   }
@@ -373,7 +374,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
 // version of the value is returned, and BI is left pointing at the instruction
 // that should be processed next by the reassociation pass.
 //
-static Value *NegateValue(Value *V, Instruction *BI) {
+static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
   // We are trying to expose opportunity for reassociation.  One of the things
   // that we want to do to achieve this is to push a negation as deep into an
   // expression chain as possible, to expose the add instructions.  In practice,
@@ -386,8 +387,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
       // Push the negates through the add.
-      I->setOperand(0, NegateValue(I->getOperand(0), BI));
-      I->setOperand(1, NegateValue(I->getOperand(1), BI));
+      I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
+      I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
 
       // We must move the add instruction here, because the neg instructions do
       // not dominate the old add instruction in general.  By moving it, we are
@@ -407,7 +408,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
 
 /// ShouldBreakUpSubtract - Return true if we should break up this subtract of
 /// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(Instruction *Sub) {
+static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
   // If this is a negation, we can't split it up!
   if (BinaryOperator::isNeg(Sub))
     return false;
@@ -431,7 +432,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
 /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
 /// only used by an add, transform this into (X+(0-Y)) to promote better
 /// reassociation.
-static Instruction *BreakUpSubtract(Instruction *Sub,
+static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
                               std::map<AssertingVH<>, unsigned> &ValueRankMap) {
   // Convert a subtract into an add and a neg instruction... so that sub
   // instructions can be commuted with other add instructions...
@@ -439,7 +440,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   // Calculate the negative value of Operand 1 of the sub instruction...
   // and set it as the RHS of the add instruction we just made...
   //
-  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
   Instruction *New =
     BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
   New->takeName(Sub);
@@ -449,7 +450,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   Sub->replaceAllUsesWith(New);
   Sub->eraseFromParent();
 
-  DOUT << "Negated: " << *New;
+  DEBUG(errs() << "Negated: " << *New << '\n');
   return New;
 }
 
@@ -458,16 +459,16 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
 /// reassociation.
 static Instruction *ConvertShiftToMul(Instruction *Shl, 
                               std::map<AssertingVH<>, unsigned> &ValueRankMap,
-                              LLVMContext* Context) {
+                              LLVMContext &Context) {
   // If an operand of this shift is a reassociable multiply, or if the shift
   // is used by a reassociable multiply or add, turn into a multiply.
   if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
       (Shl->hasOneUse() && 
        (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
         isReassociableOp(Shl->use_back(), Instruction::Add)))) {
-    Constant *MulCst = Context->getConstantInt(Shl->getType(), 1);
+    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
     MulCst =
-        Context->getConstantExprShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+        ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
     
     Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
                                                  "", Shl);
@@ -567,7 +568,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
     if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
       Ops.pop_back();
-      Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2);
+      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
       return OptimizeExpression(I, Ops);
     }
 
@@ -623,10 +624,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
         if (FoundX != i) {
           if (Opcode == Instruction::And) {   // ...&X&~X = 0
             ++NumAnnihil;
-            return Context->getNullValue(X->getType());
+            return Constant::getNullValue(X->getType());
           } else if (Opcode == Instruction::Or) {   // ...|X|~X = -1
             ++NumAnnihil;
-            return Context->getConstantIntAllOnesValue(X->getType());
+            return Constant::getAllOnesValue(X->getType());
           }
         }
       }
@@ -645,7 +646,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           assert(Opcode == Instruction::Xor);
           if (e == 2) {
             ++NumAnnihil;
-            return Context->getNullValue(Ops[0].Op->getType());
+            return Constant::getNullValue(Ops[0].Op->getType());
           }
           // ... X^X -> ...
           Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
@@ -670,7 +671,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
           // Remove X and -X from the operand list.
           if (Ops.size() == 2) {
             ++NumAnnihil;
-            return Context->getNullValue(X->getType());
+            return Constant::getNullValue(X->getType());
           } else {
             Ops.erase(Ops.begin()+i);
             if (i < FoundX)
@@ -727,7 +728,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 
     // If any factor occurred more than one time, we can pull it out.
     if (MaxOcc > 1) {
-      DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n";
+      DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n");
       
       // Create a new instruction that uses the MaxOccVal twice.  If we don't do
       // this, we could otherwise run into situations where removing a factor
@@ -781,6 +782,8 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 /// ReassociateBB - Inspect all of the instructions in this basic block,
 /// reassociating them as we go.
 void Reassociate::ReassociateBB(BasicBlock *BB) {
+  LLVMContext &Context = BB->getContext();
+  
   for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
     Instruction *BI = BBI++;
     if (BI->getOpcode() == Instruction::Shl &&
@@ -798,8 +801,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     // If this is a subtract instruction which is not already in negate form,
     // see if we can convert it to X+-Y.
     if (BI->getOpcode() == Instruction::Sub) {
-      if (ShouldBreakUpSubtract(BI)) {
-        BI = BreakUpSubtract(BI, ValueRankMap);
+      if (ShouldBreakUpSubtract(Context, BI)) {
+        BI = BreakUpSubtract(Context, BI, ValueRankMap);
         MadeChange = true;
       } else if (BinaryOperator::isNeg(BI)) {
         // Otherwise, this is a negation.  See if the operand is a multiply tree
@@ -838,7 +841,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
   std::vector<ValueEntry> Ops;
   LinearizeExprTree(I, Ops);
   
-  DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+  DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n");
   
   // Now that we have linearized the tree to a list and have gathered all of
   // the operands and their ranks, sort the operands by their rank.  Use a
@@ -853,7 +856,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
   if (Value *V = OptimizeExpression(I, Ops)) {
     // This expression tree simplified to something that isn't a tree,
     // eliminate it.
-    DOUT << "Reassoc to scalar: " << *V << "\n";
+    DEBUG(errs() << "Reassoc to scalar: " << *V << "\n");
     I->replaceAllUsesWith(V);
     RemoveDeadBinaryOp(I);
     return;
@@ -871,7 +874,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
     Ops.pop_back();
   }
   
-  DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n";
+  DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n");
   
   if (Ops.size() == 1) {
     // This expression tree simplified to something that isn't a tree,
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index ac95d25..99e1252 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -26,7 +26,6 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include <list>
 using namespace llvm;
@@ -35,7 +34,7 @@ STATISTIC(NumRegsDemoted, "Number of registers demoted");
 STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
 
 namespace {
-  struct VISIBILITY_HIDDEN RegToMem : public FunctionPass {
+  struct RegToMem : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     RegToMem() : FunctionPass(&ID) {}
 
@@ -44,73 +43,17 @@ namespace {
       AU.addPreservedID(BreakCriticalEdgesID);
     }
 
-   bool valueEscapes(Instruction* i) {
-      BasicBlock* bb = i->getParent();
-      for (Value::use_iterator ii = i->use_begin(), ie = i->use_end();
-           ii != ie; ++ii)
-        if (cast<Instruction>(*ii)->getParent() != bb ||
-            isa<PHINode>(*ii))
+   bool valueEscapes(const Instruction *Inst) const {
+     const BasicBlock *BB = Inst->getParent();
+      for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end();
+           UI != E; ++UI)
+        if (cast<Instruction>(*UI)->getParent() != BB ||
+            isa<PHINode>(*UI))
           return true;
       return false;
     }
 
-    virtual bool runOnFunction(Function &F) {
-      if (!F.isDeclaration()) {
-        // Insert all new allocas into entry block.
-        BasicBlock* BBEntry = &F.getEntryBlock();
-        assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
-               "Entry block to function must not have predecessors!");
-
-        // Find first non-alloca instruction and create insertion point. This is
-        // safe if block is well-formed: it always have terminator, otherwise
-        // we'll get and assertion.
-        BasicBlock::iterator I = BBEntry->begin();
-        while (isa<AllocaInst>(I)) ++I;
-
-        CastInst *AllocaInsertionPoint =
-          CastInst::Create(Instruction::BitCast,
-                           Context->getNullValue(Type::Int32Ty), Type::Int32Ty,
-                           "reg2mem alloca point", I);
-
-        // Find the escaped instructions. But don't create stack slots for
-        // allocas in entry block.
-        std::list<Instruction*> worklist;
-        for (Function::iterator ibb = F.begin(), ibe = F.end();
-             ibb != ibe; ++ibb)
-          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
-               iib != iie; ++iib) {
-            if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
-                valueEscapes(iib)) {
-              worklist.push_front(&*iib);
-            }
-          }
-
-        // Demote escaped instructions
-        NumRegsDemoted += worklist.size();
-        for (std::list<Instruction*>::iterator ilb = worklist.begin(), 
-               ile = worklist.end(); ilb != ile; ++ilb)
-          DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
-
-        worklist.clear();
-
-        // Find all phi's
-        for (Function::iterator ibb = F.begin(), ibe = F.end();
-             ibb != ibe; ++ibb)
-          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
-               iib != iie; ++iib)
-            if (isa<PHINode>(iib))
-              worklist.push_front(&*iib);
-
-        // Demote phi nodes
-        NumPhisDemoted += worklist.size();
-        for (std::list<Instruction*>::iterator ilb = worklist.begin(), 
-               ile = worklist.end(); ilb != ile; ++ilb)
-          DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
-
-        return true;
-      }
-      return false;
-    }
+    virtual bool runOnFunction(Function &F);
   };
 }
   
@@ -118,6 +61,66 @@ char RegToMem::ID = 0;
 static RegisterPass<RegToMem>
 X("reg2mem", "Demote all values to stack slots");
 
+
+bool RegToMem::runOnFunction(Function &F) {
+  if (F.isDeclaration()) 
+    return false;
+  
+  // Insert all new allocas into entry block.
+  BasicBlock *BBEntry = &F.getEntryBlock();
+  assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+         "Entry block to function must not have predecessors!");
+  
+  // Find first non-alloca instruction and create insertion point. This is
+  // safe if block is well-formed: it always have terminator, otherwise
+  // we'll get and assertion.
+  BasicBlock::iterator I = BBEntry->begin();
+  while (isa<AllocaInst>(I)) ++I;
+  
+  CastInst *AllocaInsertionPoint =
+    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                    Type::getInt32Ty(F.getContext()),
+                    "reg2mem alloca point", I);
+  
+  // Find the escaped instructions. But don't create stack slots for
+  // allocas in entry block.
+  std::list<Instruction*> WorkList;
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib) {
+      if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+          valueEscapes(iib)) {
+        WorkList.push_front(&*iib);
+      }
+    }
+  
+  // Demote escaped instructions
+  NumRegsDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+  
+  WorkList.clear();
+  
+  // Find all phi's
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib)
+      if (isa<PHINode>(iib))
+        WorkList.push_front(&*iib);
+  
+  // Demote phi nodes
+  NumPhisDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+  
+  return true;
+}
+
+
 // createDemoteRegisterToMemory - Provide an entry point to create this pass.
 //
 const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f0bc127..b5edf4e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -33,9 +33,10 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -58,7 +59,7 @@ namespace {
 /// LatticeVal class - This class represents the different lattice values that
 /// an LLVM value may occupy.  It is a simple class with value semantics.
 ///
-class VISIBILITY_HIDDEN LatticeVal {
+class LatticeVal {
   enum {
     /// undefined - This LLVM Value has no known value yet.
     undefined,
@@ -139,7 +140,7 @@ public:
 /// Constant Propagation.
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
-  LLVMContext* Context;
+  LLVMContext *Context;
   DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable
   std::map<Value*, LatticeVal> ValueState;  // The state each value is in.
 
@@ -179,12 +180,12 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  void setContext(LLVMContext* C) { Context = C; }
+  void setContext(LLVMContext *C) { Context = C; }
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
   void MarkBlockExecutable(BasicBlock *BB) {
-    DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
+    DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
     BBExecutable.insert(BB);   // Basic block is executable!
     BBWorkList.push_back(BB);  // Add the block to the work list!
   }
@@ -260,14 +261,14 @@ private:
   //
   inline void markConstant(LatticeVal &IV, Value *V, Constant *C) {
     if (IV.markConstant(C)) {
-      DOUT << "markConstant: " << *C << ": " << *V;
+      DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');
       InstWorkList.push_back(V);
     }
   }
   
   inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) {
     IV.markForcedConstant(C);
-    DOUT << "markForcedConstant: " << *C << ": " << *V;
+    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
   }
   
@@ -280,11 +281,11 @@ private:
   // work list so that the users of the instruction are updated later.
   inline void markOverdefined(LatticeVal &IV, Value *V) {
     if (IV.markOverdefined()) {
-      DEBUG(DOUT << "markOverdefined: ";
+      DEBUG(errs() << "markOverdefined: ";
             if (Function *F = dyn_cast<Function>(V))
-              DOUT << "Function '" << F->getName() << "'\n";
+              errs() << "Function '" << F->getName() << "'\n";
             else
-              DOUT << *V);
+              errs() << *V << '\n');
       // Only instructions go on the work list
       OverdefinedInstWorkList.push_back(V);
     }
@@ -337,8 +338,8 @@ private:
       return;  // This edge is already known to be executable!
 
     if (BBExecutable.count(Dest)) {
-      DOUT << "Marking Edge Executable: " << Source->getNameStart()
-           << " -> " << Dest->getNameStart() << "\n";
+      DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+            << " -> " << Dest->getName() << "\n");
 
       // The destination is already executable, but we just made an edge
       // feasible that wasn't before.  Revisit the PHI nodes in the block
@@ -399,7 +400,9 @@ private:
   void visitStoreInst     (Instruction &I);
   void visitLoadInst      (LoadInst &I);
   void visitGetElementPtrInst(GetElementPtrInst &I);
-  void visitCallInst      (CallInst &I) { visitCallSite(CallSite::get(&I)); }
+  void visitCallInst      (CallInst &I) { 
+    visitCallSite(CallSite::get(&I));
+  }
   void visitInvokeInst    (InvokeInst &II) {
     visitCallSite(CallSite::get(&II));
     visitTerminatorInst(II);
@@ -414,7 +417,7 @@ private:
 
   void visitInstruction(Instruction &I) {
     // If a new instruction is added to LLVM that we don't handle...
-    cerr << "SCCP: Don't know how to handle: " << I;
+    errs() << "SCCP: Don't know how to handle: " << I;
     markOverdefined(&I);   // Just in case
   }
 };
@@ -440,7 +443,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
         Succs[0] = Succs[1] = true;
       } else if (BCValue.isConstant()) {
         // Constant condition variables mean the branch can only go a single way
-        Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true;
+        Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true;
       }
     }
   } else if (isa<InvokeInst>(&TI)) {
@@ -455,7 +458,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
     } else if (SCValue.isConstant())
       Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;
   } else {
-    assert(0 && "SCCP: Don't know how to handle this terminator!");
+    llvm_unreachable("SCCP: Don't know how to handle this terminator!");
   }
 }
 
@@ -485,7 +488,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 
         // Constant condition variables mean the branch can only go a single way
         return BI->getSuccessor(BCValue.getConstant() ==
-                                       Context->getConstantIntFalse()) == To;
+                                       ConstantInt::getFalse(*Context)) == To;
       }
       return false;
     }
@@ -513,8 +516,10 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     }
     return false;
   } else {
-    cerr << "Unknown terminator instruction: " << *TI;
-    abort();
+#ifndef NDEBUG
+    errs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+    llvm_unreachable(0);
   }
 }
 
@@ -642,7 +647,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
       DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator
         It = TrackedMultipleRetVals.find(std::make_pair(F, i));
       if (It == TrackedMultipleRetVals.end()) break;
-      if (Value *Val = FindInsertedValue(I.getOperand(0), i))
+      if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext()))
         mergeInValue(It->second, F, getValueState(Val));
     }
   }
@@ -666,7 +671,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
   if (VState.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
   else if (VState.isConstant())        // Propagate constant value
-    markConstant(&I, Context->getConstantExprCast(I.getOpcode(), 
+    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
                                            VState.getConstant(), I.getType()));
 }
 
@@ -809,12 +814,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
         if (NonOverdefVal->isUndefined()) {
           // Could annihilate value.
           if (I.getOpcode() == Instruction::And)
-            markConstant(IV, &I, Context->getNullValue(I.getType()));
+            markConstant(IV, &I, Constant::getNullValue(I.getType()));
           else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
-            markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT));
+            markConstant(IV, &I, Constant::getAllOnesValue(PT));
           else
             markConstant(IV, &I,
-                         Context->getConstantIntAllOnesValue(I.getType()));
+                         Constant::getAllOnesValue(I.getType()));
           return;
         } else {
           if (I.getOpcode() == Instruction::And) {
@@ -859,7 +864,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
               Constant *V =
-                     Context->getConstantExpr(I.getOpcode(), In1.getConstant(),
+                     ConstantExpr::get(I.getOpcode(), In1.getConstant(),
                                               In2.getConstant());
               if (Result.isUndefined())
                 Result.markConstant(V);
@@ -908,7 +913,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
     markConstant(IV, &I,
-                Context->getConstantExpr(I.getOpcode(), V1State.getConstant(),
+                ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
                                            V2State.getConstant()));
   }
 }
@@ -945,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
               Result.markOverdefined();
               break;  // Cannot fold this operation over the PHI nodes!
             } else if (In1.isConstant() && In2.isConstant()) {
-              Constant *V = Context->getConstantExprCompare(I.getPredicate(), 
+              Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
                                                      In1.getConstant(), 
                                                      In2.getConstant());
               if (Result.isUndefined())
@@ -994,7 +999,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
 
     markOverdefined(IV, &I);
   } else if (V1State.isConstant() && V2State.isConstant()) {
-    markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(), 
+    markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
                                                   V1State.getConstant(), 
                                                   V2State.getConstant()));
   }
@@ -1096,7 +1101,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
   Constant *Ptr = Operands[0];
   Operands.erase(Operands.begin());  // Erase the pointer from idx list...
 
-  markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0],
+  markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],
                                                       Operands.size()));
 }
 
@@ -1127,10 +1132,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
   if (PtrVal.isConstant() && !I.isVolatile()) {
     Value *Ptr = PtrVal.getConstant();
     // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(Ptr) && 
-        cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
+    if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) {
       // load null -> null
-      markConstant(IV, &I, Context->getNullValue(I.getType()));
+      markConstant(IV, &I, Constant::getNullValue(I.getType()));
       return;
     }
 
@@ -1179,7 +1183,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
   if (F == 0 || !F->hasLocalLinkage()) {
 CallOverdefined:
     // Void return and not tracking callee, just bail.
-    if (I->getType() == Type::VoidTy) return;
+    if (I->getType()->isVoidTy()) return;
     
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
@@ -1258,6 +1262,10 @@ CallOverdefined:
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
        AI != E; ++AI, ++CAI) {
     LatticeVal &IV = ValueState[AI];
+    if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+      IV.markOverdefined();
+      continue;
+    }
     if (!IV.isOverdefined())
       mergeInValue(IV, AI, getValueState(*CAI));
   }
@@ -1273,7 +1281,7 @@ void SCCPSolver::Solve() {
       Value *I = OverdefinedInstWorkList.back();
       OverdefinedInstWorkList.pop_back();
 
-      DOUT << "\nPopped off OI-WL: " << *I;
+      DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
       // bottom to constant
@@ -1291,7 +1299,7 @@ void SCCPSolver::Solve() {
       Value *I = InstWorkList.back();
       InstWorkList.pop_back();
 
-      DOUT << "\nPopped off I-WL: " << *I;
+      DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
       // bottom to constant
@@ -1311,7 +1319,7 @@ void SCCPSolver::Solve() {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
-      DOUT << "\nPopped off BBWL: " << *BB;
+      DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n');
 
       // Notify all instructions in this basic block that they are newly
       // executable.
@@ -1345,7 +1353,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
       // Look for instructions which produce undef values.
-      if (I->getType() == Type::VoidTy) continue;
+      if (I->getType()->isVoidTy()) continue;
       
       LatticeVal &LV = getValueState(I);
       if (!LV.isUndefined()) continue;
@@ -1371,22 +1379,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // to be handled here, because we don't know whether the top part is 1's
         // or 0's.
         assert(Op0LV.isUndefined());
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
       case Instruction::And:
         // undef * X -> 0.   X could be zero.
         // undef & X -> 0.   X could be zero.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
 
       case Instruction::Or:
         // undef | X -> -1.   X could be -1.
         if (const VectorType *PTy = dyn_cast<VectorType>(ITy))
           markForcedConstant(LV, I,
-                             Context->getConstantVectorAllOnesValue(PTy));
+                             Constant::getAllOnesValue(PTy));
         else          
-          markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy));
+          markForcedConstant(LV, I, Constant::getAllOnesValue(ITy));
         return true;
 
       case Instruction::SDiv:
@@ -1399,7 +1407,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
         
       case Instruction::AShr:
@@ -1420,7 +1428,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         
         // X >> undef -> 0.  X could be 0.
         // X << undef -> 0.  X could be 0.
-        markForcedConstant(LV, I, Context->getNullValue(ITy));
+        markForcedConstant(LV, I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Select:
         // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
@@ -1483,7 +1491,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // as undef, then further analysis could think the undef went another way
     // leading to an inconsistent set of conclusions.
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      BI->setCondition(Context->getConstantIntFalse());
+      BI->setCondition(ConstantInt::getFalse(*Context));
     } else {
       SwitchInst *SI = cast<SwitchInst>(TI);
       SI->setCondition(SI->getCaseValue(1));
@@ -1502,7 +1510,7 @@ namespace {
   /// SCCP Class - This class uses the SCCPSolver to implement a per-function
   /// Sparse Conditional Constant Propagator.
   ///
-  struct VISIBILITY_HIDDEN SCCP : public FunctionPass {
+  struct SCCP : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(&ID) {}
 
@@ -1531,9 +1539,9 @@ FunctionPass *llvm::createSCCPPass() {
 // and return true if the function was modified.
 //
 bool SCCP::runOnFunction(Function &F) {
-  DOUT << "SCCP on function '" << F.getNameStart() << "'\n";
+  DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n");
   SCCPSolver Solver;
-  Solver.setContext(Context);
+  Solver.setContext(&F.getContext());
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
@@ -1546,7 +1554,7 @@ bool SCCP::runOnFunction(Function &F) {
   bool ResolvedUndefs = true;
   while (ResolvedUndefs) {
     Solver.Solve();
-    DOUT << "RESOLVING UNDEFs\n";
+    DEBUG(errs() << "RESOLVING UNDEFs\n");
     ResolvedUndefs = Solver.ResolvedUndefsIn(F);
   }
 
@@ -1561,7 +1569,7 @@ bool SCCP::runOnFunction(Function &F) {
 
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (!Solver.isBlockExecutable(BB)) {
-      DOUT << "  BasicBlock Dead:" << *BB;
+      DEBUG(errs() << "  BasicBlock Dead:" << *BB);
       ++NumDeadBlocks;
 
       // Delete the instructions backwards, as it has a reduced likelihood of
@@ -1573,7 +1581,7 @@ bool SCCP::runOnFunction(Function &F) {
         Instruction *I = Insts.back();
         Insts.pop_back();
         if (!I->use_empty())
-          I->replaceAllUsesWith(Context->getUndef(I->getType()));
+          I->replaceAllUsesWith(UndefValue::get(I->getType()));
         BB->getInstList().erase(I);
         MadeChanges = true;
         ++NumInstRemoved;
@@ -1584,8 +1592,7 @@ bool SCCP::runOnFunction(Function &F) {
       //
       for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
         Instruction *Inst = BI++;
-        if (Inst->getType() == Type::VoidTy ||
-            isa<TerminatorInst>(Inst))
+        if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
           continue;
         
         LatticeVal &IV = Values[Inst];
@@ -1593,8 +1600,8 @@ bool SCCP::runOnFunction(Function &F) {
           continue;
         
         Constant *Const = IV.isConstant()
-          ? IV.getConstant() : Context->getUndef(Inst->getType());
-        DOUT << "  Constant: " << *Const << " = " << *Inst;
+          ? IV.getConstant() : UndefValue::get(Inst->getType());
+        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
         // Replaces all of the uses of a variable with uses of the constant.
         Inst->replaceAllUsesWith(Const);
@@ -1617,7 +1624,7 @@ namespace {
   /// IPSCCP Class - This class implements interprocedural Sparse Conditional
   /// Constant Propagation.
   ///
-  struct VISIBILITY_HIDDEN IPSCCP : public ModulePass {
+  struct IPSCCP : public ModulePass {
     static char ID;
     IPSCCP() : ModulePass(&ID) {}
     bool runOnModule(Module &M);
@@ -1658,7 +1665,10 @@ static bool AddressIsTaken(GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
+  LLVMContext *Context = &M.getContext();
+  
   SCCPSolver Solver;
+  Solver.setContext(Context);
 
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
@@ -1687,7 +1697,7 @@ bool IPSCCP::runOnModule(Module &M) {
   while (ResolvedUndefs) {
     Solver.Solve();
 
-    DOUT << "RESOLVING UNDEFS\n";
+    DEBUG(errs() << "RESOLVING UNDEFS\n");
     ResolvedUndefs = false;
     for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
       ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
@@ -1709,8 +1719,8 @@ bool IPSCCP::runOnModule(Module &M) {
         LatticeVal &IV = Values[AI];
         if (IV.isConstant() || IV.isUndefined()) {
           Constant *CST = IV.isConstant() ?
-            IV.getConstant() : Context->getUndef(AI->getType());
-          DOUT << "***  Arg " << *AI << " = " << *CST <<"\n";
+            IV.getConstant() : UndefValue::get(AI->getType());
+          DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");
 
           // Replaces all of the uses of a variable with uses of the
           // constant.
@@ -1721,7 +1731,7 @@ bool IPSCCP::runOnModule(Module &M) {
 
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (!Solver.isBlockExecutable(BB)) {
-        DOUT << "  BasicBlock Dead:" << *BB;
+        DEBUG(errs() << "  BasicBlock Dead:" << *BB);
         ++IPNumDeadBlocks;
 
         // Delete the instructions backwards, as it has a reduced likelihood of
@@ -1734,7 +1744,7 @@ bool IPSCCP::runOnModule(Module &M) {
           Instruction *I = Insts.back();
           Insts.pop_back();
           if (!I->use_empty())
-            I->replaceAllUsesWith(Context->getUndef(I->getType()));
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
           BB->getInstList().erase(I);
           MadeChanges = true;
           ++IPNumInstRemoved;
@@ -1746,18 +1756,18 @@ bool IPSCCP::runOnModule(Module &M) {
             TI->getSuccessor(i)->removePredecessor(BB);
         }
         if (!TI->use_empty())
-          TI->replaceAllUsesWith(Context->getUndef(TI->getType()));
+          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
         BB->getInstList().erase(TI);
 
         if (&*BB != &F->front())
           BlocksToErase.push_back(BB);
         else
-          new UnreachableInst(BB);
+          new UnreachableInst(M.getContext(), BB);
 
       } else {
         for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
           Instruction *Inst = BI++;
-          if (Inst->getType() == Type::VoidTy)
+          if (Inst->getType()->isVoidTy())
             continue;
           
           LatticeVal &IV = Values[Inst];
@@ -1765,8 +1775,8 @@ bool IPSCCP::runOnModule(Module &M) {
             continue;
           
           Constant *Const = IV.isConstant()
-            ? IV.getConstant() : Context->getUndef(Inst->getType());
-          DOUT << "  Constant: " << *Const << " = " << *Inst;
+            ? IV.getConstant() : UndefValue::get(Inst->getType());
+          DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
 
           // Replaces all of the uses of a variable with uses of the
           // constant.
@@ -1802,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) {
           } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
             assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
           } else {
-            assert(0 && "Didn't fold away reference to block!");
+            llvm_unreachable("Didn't fold away reference to block!");
           }
 #endif
           
@@ -1834,12 +1844,12 @@ bool IPSCCP::runOnModule(Module &M) {
   for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
          E = RV.end(); I != E; ++I)
     if (!I->second.isOverdefined() &&
-        I->first->getReturnType() != Type::VoidTy) {
+        !I->first->getReturnType()->isVoidTy()) {
       Function *F = I->first;
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
         if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
           if (!isa<UndefValue>(RI->getOperand(0)))
-            RI->setOperand(0, Context->getUndef(F->getReturnType()));
+            RI->setOperand(0, UndefValue::get(F->getReturnType()));
     }
 
   // If we infered constant or undef values for globals variables, we can delete
@@ -1850,7 +1860,7 @@ bool IPSCCP::runOnModule(Module &M) {
     GlobalVariable *GV = I->first;
     assert(!I->second.isOverdefined() &&
            "Overdefined values should have been taken out of the map!");
-    DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n";
+    DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n");
     while (!GV->use_empty()) {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       SI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 109fb90..610d874 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -34,13 +34,13 @@
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 STATISTIC(NumReplaced,  "Number of allocas broken up");
@@ -49,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar");
 STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
 
 namespace {
-  struct VISIBILITY_HIDDEN SROA : public FunctionPass {
+  struct SROA : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     explicit SROA(signed T = -1) : FunctionPass(&ID) {
       if (T == -1)
@@ -68,7 +68,6 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<DominanceFrontier>();
-      AU.addRequired<TargetData>();
       AU.setPreservesCFG();
     }
 
@@ -150,9 +149,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
 
 
 bool SROA::runOnFunction(Function &F) {
-  TD = &getAnalysis<TargetData>();
-  
+  TD = getAnalysisIfAvailable<TargetData>();
+
   bool Changed = performPromotion(F);
+
+  // FIXME: ScalarRepl currently depends on TargetData more than it
+  // theoretically needs to. It should be refactored in order to support
+  // target-independent IR. Until this is done, just skip the actual
+  // scalar-replacement portion of this pass.
+  if (!TD) return Changed;
+
   while (1) {
     bool LocalChange = performScalarRepl(F);
     if (!LocalChange) break;   // No need to repromote if no scalarrepl
@@ -186,7 +192,7 @@ bool SROA::performPromotion(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT, DF, F.getContext());
     NumPromoted += Allocas.size();
     Changed = true;
   }
@@ -238,11 +244,10 @@ bool SROA::performScalarRepl(Function &F) {
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
     if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
-      DOUT << "Found alloca equal to global: " << *AI;
-      DOUT << "  memcpy = " << *TheCopy;
+      DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n');
+      DEBUG(errs() << "  memcpy = " << *TheCopy << '\n');
       Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
-      AI->replaceAllUsesWith(
-                        Context->getConstantExprBitCast(TheSrc, AI->getType()));
+      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
       ++NumGlobals;
@@ -256,9 +261,12 @@ bool SROA::performScalarRepl(Function &F) {
     // value cannot be decomposed at all.
     uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
 
+    // Do not promote [0 x %struct].
+    if (AllocaSize == 0) continue;
+
     // Do not promote any struct whose size is too big.
     if (AllocaSize > SRThreshold) continue;
-        
+
     if ((isa<StructType>(AI->getAllocatedType()) ||
          isa<ArrayType>(AI->getAllocatedType())) &&
         // Do not promote any struct into more than "32" separate vars.
@@ -266,7 +274,7 @@ bool SROA::performScalarRepl(Function &F) {
       // Check that all of the users of the allocation are capable of being
       // transformed.
       switch (isSafeAllocaToScalarRepl(AI)) {
-      default: assert(0 && "Unexpected value!");
+      default: llvm_unreachable("Unexpected value!");
       case 0:  // Not safe to scalar replace.
         break;
       case 1:  // Safe, but requires cleanup/canonicalizations first
@@ -298,16 +306,17 @@ bool SROA::performScalarRepl(Function &F) {
       // we just get a lot of insert/extracts.  If at least one vector is
       // involved, then we probably really do have a union of vector/array.
       if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
-        DOUT << "CONVERT TO VECTOR: " << *AI << "  TYPE = " << *VectorTy <<"\n";
+        DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+                     << *VectorTy << '\n');
         
         // Create and insert the vector alloca.
-        NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin());
+        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       } else {
-        DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
+        DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
         
         // Create and insert the integer alloca.
-        const Type *NewTy = Context->getIntegerType(AllocaSize*8);
+        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
         NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       }
@@ -328,14 +337,14 @@ bool SROA::performScalarRepl(Function &F) {
 /// predicate, do SROA now.
 void SROA::DoScalarReplacement(AllocationInst *AI, 
                                std::vector<AllocationInst*> &WorkList) {
-  DOUT << "Found inst to SROA: " << *AI;
+  DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');
   SmallVector<AllocaInst*, 32> ElementAllocas;
   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
     ElementAllocas.reserve(ST->getNumContainedTypes());
     for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
       AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 
                                       AI->getAlignment(),
-                                      AI->getName() + "." + utostr(i), AI);
+                                      AI->getName() + "." + Twine(i), AI);
       ElementAllocas.push_back(NA);
       WorkList.push_back(NA);  // Add to worklist for recursive processing
     }
@@ -345,7 +354,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
     const Type *ElTy = AT->getElementType();
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
       AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
-                                      AI->getName() + "." + utostr(i), AI);
+                                      AI->getName() + "." + Twine(i), AI);
       ElementAllocas.push_back(NA);
       WorkList.push_back(NA);  // Add to worklist for recursive processing
     }
@@ -371,7 +380,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
     //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 
     // (Also works for arrays instead of structs)
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      Value *Insert = Context->getUndef(LI->getType());
+      Value *Insert = UndefValue::get(LI->getType());
       for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
         Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
         Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
@@ -418,7 +427,8 @@ void SROA::DoScalarReplacement(AllocationInst *AI,
       // expanded itself once the worklist is rerun.
       //
       SmallVector<Value*, 8> NewArgs;
-      NewArgs.push_back(Context->getNullValue(Type::Int32Ty));
+      NewArgs.push_back(Constant::getNullValue(
+                                           Type::getInt32Ty(AI->getContext())));
       NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
       RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
                                            NewArgs.end(), "", GEPI);
@@ -478,7 +488,7 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
         if (Info.isUnsafe) return;
         break;
       }
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     case Instruction::Call:
       if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
@@ -488,10 +498,10 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
           break;
         }
       }
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     default:
-      DOUT << "  Transformation preventing inst: " << *User;
+      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
       return MarkUnsafe(Info);
     }
   }
@@ -531,7 +541,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
 
   // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
   if (I == E ||
-      I.getOperand() != Context->getNullValue(I.getOperand()->getType())) {
+      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
     return MarkUnsafe(Info);
   }
 
@@ -727,6 +737,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
   // that doesn't have anything to do with the alloca that we are promoting. For
   // memset, this Value* stays null.
   Value *OtherPtr = 0;
+  LLVMContext &Context = MI->getContext();
   unsigned MemAlignment = MI->getAlignment();
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
     if (BCInst == MTI->getRawDest())
@@ -764,7 +775,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
   const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == BCInst;
   
-  Constant *Zero = Context->getNullValue(Type::Int32Ty);
+  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
 
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -772,9 +783,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
     unsigned OtherEltAlign = MemAlignment;
     
     if (OtherPtr) {
-      Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) };
+      Value *Idx[2] = { Zero,
+                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
       OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
-                                           OtherPtr->getNameStr()+"."+utostr(i),
+                                           OtherPtr->getNameStr()+"."+Twine(i),
                                            MI);
       uint64_t EltOffset;
       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
@@ -819,7 +831,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Constant *StoreVal;
       if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
         if (CI->isZero()) {
-          StoreVal = Context->getNullValue(EltTy);  // 0.0, null, 0, <0,0>
+          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
           // If EltTy is a vector type, get the element type.
           const Type *ValTy = EltTy->getScalarType();
@@ -835,18 +847,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
           }
           
           // Convert the integer value to the appropriate type.
-          StoreVal = Context->getConstantInt(TotalVal);
+          StoreVal = ConstantInt::get(Context, TotalVal);
           if (isa<PointerType>(ValTy))
-            StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy);
+            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
           else if (ValTy->isFloatingPoint())
-            StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy);
+            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
           
           // If the requested value was a vector constant, create it.
           if (EltTy != ValTy) {
             unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
             SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = Context->getConstantVector(&Elts[0], NumElts);
+            StoreVal = ConstantVector::get(&Elts[0], NumElts);
           }
         }
         new StoreInst(StoreVal, EltPtr, MI);
@@ -872,15 +884,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       Value *Ops[] = {
         SROADest ? EltPtr : OtherElt,  // Dest ptr
         SROADest ? OtherElt : EltPtr,  // Src ptr
-        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
-        Context->getConstantInt(Type::Int32Ty, OtherEltAlign)  // Align
+        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+        // Align
+        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
     } else {
       assert(isa<MemSetInst>(MI));
       Value *Ops[] = {
         EltPtr, MI->getOperand(2),  // Dest, Value,
-        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
+        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
         Zero  // Align
       };
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
@@ -910,9 +923,11 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
   // Handle tail padding by extending the operand
   if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
     SrcVal = new ZExtInst(SrcVal,
-                          Context->getIntegerType(AllocaSizeBits), "", SI);
+                          IntegerType::get(SI->getContext(), AllocaSizeBits), 
+                          "", SI);
 
-  DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+  DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+               << '\n');
 
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
@@ -929,7 +944,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
@@ -942,7 +957,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       if (FieldSizeBits != AllocaSizeBits)
         EltVal = new TruncInst(EltVal,
-                               Context->getIntegerType(FieldSizeBits), "", SI);
+                             IntegerType::get(SI->getContext(), FieldSizeBits),
+                              "", SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
@@ -952,7 +968,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                              Context->getPointerTypeUnqual(EltVal->getType()),
+                              PointerType::getUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -977,7 +993,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       
       Value *EltVal = SrcVal;
       if (Shift) {
-        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
         EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
                                             "sroa.store.elt", SI);
       }
@@ -985,7 +1001,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       // Truncate down to an integer of the right size.
       if (ElementSizeBits != AllocaSizeBits)
         EltVal = new TruncInst(EltVal, 
-                               Context->getIntegerType(ElementSizeBits),"",SI);
+                               IntegerType::get(SI->getContext(), 
+                                                ElementSizeBits),"",SI);
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
@@ -995,7 +1012,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
         DestField = new BitCastInst(DestField,
-                              Context->getPointerTypeUnqual(EltVal->getType()),
+                              PointerType::getUnqual(EltVal->getType()),
                                     "", SI);
       }
       new StoreInst(EltVal, DestField, SI);
@@ -1026,7 +1043,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
       TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
     return;
   
-  DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
+  DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+               << '\n');
   
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
@@ -1038,9 +1056,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
     ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
   }    
-    
-  Value *ResultVal =
-                 Context->getNullValue(Context->getIntegerType(AllocaSizeBits));
+  
+  Value *ResultVal = 
+    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
   
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // Load the value from the alloca.  If the NewElt is an aggregate, cast
@@ -1053,11 +1071,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
     // Ignore zero sized fields like {}, they obviously contain no data.
     if (FieldSizeBits == 0) continue;
     
-    const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits);
+    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 
+                                                     FieldSizeBits);
     if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
         !isa<VectorType>(FieldTy))
       SrcField = new BitCastInst(SrcField,
-                                 Context->getPointerTypeUnqual(FieldIntTy),
+                                 PointerType::getUnqual(FieldIntTy),
                                  "", LI);
     SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
 
@@ -1082,7 +1101,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
       Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
     
     if (Shift) {
-      Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift);
+      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
       SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
     }
 
@@ -1152,7 +1171,8 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
        I != E; ++I) {
     isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
     if (Info.isUnsafe) {
-      DOUT << "Cannot transform: " << *AI << "  due to user: " << **I;
+      DEBUG(errs() << "Cannot transform: " << *AI << "\n  due to user: "
+                   << **I << '\n');
       return 0;
     }
   }
@@ -1186,24 +1206,25 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
     return;
 
   if (NumElements == 1) {
-    GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty));
+    GEPI->setOperand(2, 
+                  Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));
     return;
   } 
     
   assert(NumElements == 2 && "Unhandled case!");
   // All users of the GEP must be loads.  At each use of the GEP, insert
   // two loads of the appropriate indexed GEP and select between them.
-  Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), 
-                              Context->getNullValue(I.getOperand()->getType()),
-                              "isone", GEPI);
+  Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), 
+                              Constant::getNullValue(I.getOperand()->getType()),
+                              "isone");
   // Insert the new GEP instructions, which are properly indexed.
   SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
-  Indices[1] = Context->getNullValue(Type::Int32Ty);
+  Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
   Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                              Indices.begin(),
                                              Indices.end(),
                                              GEPI->getName()+".0", GEPI);
-  Indices[1] = Context->getConstantInt(Type::Int32Ty, 1);
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
   Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
                                             Indices.begin(),
                                             Indices.end(),
@@ -1261,9 +1282,9 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) {
 ///      and stores would mutate the memory.
 static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
                         unsigned AllocaSize, const TargetData &TD,
-                        LLVMContext* Context) {
+                        LLVMContext &Context) {
   // If this could be contributing to a vector, analyze it.
-  if (VecTy != Type::VoidTy) { // either null or a vector type.
+  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
 
     // If the In type is a vector that is the same size as the alloca, see if it
     // matches the existing VecTy.
@@ -1276,7 +1297,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
           VecTy = VInTy;
         return;
       }
-    } else if (In == Type::FloatTy || In == Type::DoubleTy ||
+    } else if (In->isFloatTy() || In->isDoubleTy() ||
                (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
                 isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
       // If we're accessing something that could be an element of a vector, see
@@ -1289,7 +1310,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
            cast<VectorType>(VecTy)->getElementType()
                  ->getPrimitiveSizeInBits()/8 == EltSize)) {
         if (VecTy == 0)
-          VecTy = Context->getVectorType(In, AllocaSize/EltSize);
+          VecTy = VectorType::get(In, AllocaSize/EltSize);
         return;
       }
     }
@@ -1297,7 +1318,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
   
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
-  VecTy = Type::VoidTy;
+  VecTy = Type::getVoidTy(Context);
 }
 
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
@@ -1320,7 +1341,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Don't break volatile loads.
       if (LI->isVolatile())
         return false;
-      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context);
+      MergeInType(LI->getType(), Offset, VecTy,
+                  AllocaSize, *TD, V->getContext());
       SawVec |= isa<VectorType>(LI->getType());
       continue;
     }
@@ -1329,7 +1351,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       // Storing the pointer, not into the value?
       if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
       MergeInType(SI->getOperand(0)->getType(), Offset,
-                  VecTy, AllocaSize, *TD, Context);
+                  VecTy, AllocaSize, *TD, V->getContext());
       SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
       continue;
     }
@@ -1433,7 +1455,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
     
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       assert(SI->getOperand(0) != Ptr && "Consistency error!");
-      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
+      // FIXME: Remove once builder has Twine API.
+      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
       Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
                                              Builder);
       Builder.CreateStore(New, NewAI);
@@ -1457,8 +1480,10 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
           for (unsigned i = 1; i != NumBytes; ++i)
             APVal |= APVal << 8;
         
-        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
-        Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal),
+        // FIXME: Remove once builder has Twine API.
+        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
                                                Old, Offset, Builder);
         Builder.CreateStore(New, NewAI);
       }
@@ -1510,8 +1535,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
       continue;
     }
 
-    assert(0 && "Unsupported operation!");
-    abort();
+    llvm_unreachable("Unsupported operation!");
   }
 }
 
@@ -1545,9 +1569,8 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
       assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
     }
     // Return the element extracted out of it.
-    Value *V = Builder.CreateExtractElement(FromVal,
-                                    Context->getConstantInt(Type::Int32Ty,Elt),
-                                            "tmp");
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
     if (V->getType() != ToType)
       V = Builder.CreateBitCast(V, ToType, "tmp");
     return V;
@@ -1557,7 +1580,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // use insertvalue's to form the FCA.
   if (const StructType *ST = dyn_cast<StructType>(ToType)) {
     const StructLayout &Layout = *TD->getStructLayout(ST);
-    Value *Res = Context->getUndef(ST);
+    Value *Res = UndefValue::get(ST);
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
                                         Offset+Layout.getElementOffsetInBits(i),
@@ -1569,7 +1592,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   
   if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
     uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
-    Value *Res = Context->getUndef(AT);
+    Value *Res = UndefValue::get(AT);
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
       Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
                                               Offset+i*EltSize, Builder);
@@ -1599,21 +1622,23 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // only some bits are used.
   if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
     FromVal = Builder.CreateLShr(FromVal,
-                                 Context->getConstantInt(FromVal->getType(),
+                                 ConstantInt::get(FromVal->getType(),
                                                            ShAmt), "tmp");
   else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
     FromVal = Builder.CreateShl(FromVal, 
-                                Context->getConstantInt(FromVal->getType(),
+                                ConstantInt::get(FromVal->getType(),
                                                           -ShAmt), "tmp");
 
   // Finally, unconditionally truncate the integer to the right width.
   unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
   if (LIBitWidth < NTy->getBitWidth())
     FromVal =
-      Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
   else if (LIBitWidth > NTy->getBitWidth())
     FromVal =
-       Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
+                                                    LIBitWidth), "tmp");
 
   // If the result is an integer, this is a trunc or bitcast.
   if (isa<IntegerType>(ToType)) {
@@ -1645,6 +1670,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // Convert the stored type to the actual type, shift it left to insert
   // then 'or' into place.
   const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
 
   if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
     uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
@@ -1664,7 +1690,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
       SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
     
     SV = Builder.CreateInsertElement(Old, SV, 
-                                   Context->getConstantInt(Type::Int32Ty, Elt),
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
                                      "tmp");
     return SV;
   }
@@ -1697,9 +1723,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
   unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
   if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
-    SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp");
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
   else if (isa<PointerType>(SV->getType()))
-    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp");
+    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
 
   // Zero extend or truncate the value if needed.
   if (SV->getType() != AllocaType) {
@@ -1732,11 +1759,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // only some bits in the structure are set.
   APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
   if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
-    SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(),
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
                            ShAmt), "tmp");
     Mask <<= ShAmt;
   } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
-    SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(),
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
                             -ShAmt), "tmp");
     Mask = Mask.lshr(-ShAmt);
   }
@@ -1745,7 +1772,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   // in the new bits.
   if (SrcWidth != DestWidth) {
     assert(DestWidth > SrcWidth);
-    Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask");
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
     SV = Builder.CreateOr(Old, SV, "ins");
   }
   return SV;
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b8bce80..29712b3 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Module.h"
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -40,7 +39,7 @@ using namespace llvm;
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass {
+  struct CFGSimplifyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     CFGSimplifyPass() : FunctionPass(&ID) {}
 
@@ -58,20 +57,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {
 
 /// ChangeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) {
+static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
     (*SI)->removePredecessor(BB);
   
-  new UnreachableInst(I);
+  new UnreachableInst(I->getContext(), I);
   
   // All instructions after this are dead.
   BasicBlock::iterator BBI = I, BBE = BB->end();
   while (BBI != BBE) {
     if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(Context->getUndef(BBI->getType()));
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
     BB->getInstList().erase(BBI++);
   }
 }
@@ -97,7 +96,7 @@ static void ChangeToCall(InvokeInst *II) {
 
 static bool MarkAliveBlocks(BasicBlock *BB,
                             SmallPtrSet<BasicBlock*, 128> &Reachable,
-                            LLVMContext* Context) {
+                            LLVMContext &Context) {
   
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
@@ -132,7 +131,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
-             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
+             SI->getPointerAddressSpace() == 0)) {
           ChangeToUnreachable(SI, Context);
           Changed = true;
           break;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 4aad17d..13077fe 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -22,15 +22,13 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 namespace {
   /// This pass optimizes well half_powr function calls.
   ///
-  class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass {
+  class SimplifyHalfPowrLibCalls : public FunctionPass {
     const TargetData *TD;
   public:
     static char ID; // Pass identification
@@ -39,7 +37,6 @@ namespace {
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
 
     Instruction *
@@ -60,8 +57,9 @@ FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
 /// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
 /// their control flow to better facilitate subsequent optimization.
 Instruction *
-SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                                        Instruction *InsertPt) {
+SimplifyHalfPowrLibCalls::
+InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+                Instruction *InsertPt) {
   std::vector<BasicBlock *> Bodies;
   BasicBlock *NewBlock = 0;
 
@@ -123,7 +121,7 @@ SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &Half
 /// runOnFunction - Top level algorithm.
 ///
 bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
-  TD = &getAnalysis<TargetData>();
+  TD = getAnalysisIfAvailable<TargetData>();
   
   bool Changed = false;
   std::vector<Instruction *> HalfPowrs;
@@ -136,8 +134,7 @@ bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
         Function *Callee = CI->getCalledFunction();
         if (Callee && Callee->hasExternalLinkage()) {
           // Look for calls with well-known names.
-          const char *CalleeName = Callee->getNameStart();
-          if (strcmp(CalleeName, "__half_powrf4") == 0)
+          if (Callee->getName() == "__half_powrf4")
             IsHalfPowr = true;
         }
       }
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index ec48469..e1866015 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -9,11 +9,9 @@
 //
 // This file implements a simple pass that applies a variety of small
 // optimizations for calls to specific well-known function calls (e.g. runtime
-// library functions). For example, a call to the function "exit(3)" that
-// occurs within the main() function can be transformed into a simple "return 3"
-// instruction. Any optimization that takes this form (replace call to library
-// function with simpler code that provides the same result) belongs in this
-// file.
+// library functions).   Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,8 +27,9 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
@@ -44,7 +43,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 /// This class is the abstract base class for the set of optimizations that
 /// corresponds to one library call.
 namespace {
-class VISIBILITY_HIDDEN LibCallOptimization {
+class LibCallOptimization {
 protected:
   Function *Caller;
   const TargetData *TD;
@@ -58,14 +57,14 @@ public:
   /// performed.  If it returns CI, then it transformed the call and CI is to be
   /// deleted.  If it returns something else, replace CI with the new value and
   /// delete CI.
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) 
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
-  
-  Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) {
+
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
-    this->TD = &TD;
+    this->TD = TD;
     if (CI->getCalledFunction())
-      Context = CI->getCalledFunction()->getContext();
+      Context = &CI->getCalledFunction()->getContext();
     return CallOptimizer(CI->getCalledFunction(), CI, B);
   }
 
@@ -76,12 +75,12 @@ public:
   /// specified pointer.  Ptr is required to be some pointer type, and the
   /// return value has 'intptr_t' type.
   Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
-  
+
   /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
   /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, 
+  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                     unsigned Align, IRBuilder<> &B);
-  
+
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
@@ -96,35 +95,36 @@ public:
   /// 'floor').  This function is known to take a single of type matching 'Op'
   /// and returns one value with the same type.  If 'Op' is a long double, 'l'
   /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B);
-  
+  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
+                              const AttrListPtr &Attrs);
+
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
   void EmitPutChar(Value *Char, IRBuilder<> &B);
-  
+
   /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
   /// some pointer.
   void EmitPutS(Value *Str, IRBuilder<> &B);
-    
+
   /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
   /// an i32, and File is a pointer to FILE.
   void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
-  
+
   /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
   /// pointer and File is a pointer to FILE.
   void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
-  
+
   /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
-  
+
 };
 } // End anonymous namespace.
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
   return
-        B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr");
+        B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -137,8 +137,8 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
                                    Attribute::NoUnwind);
 
   Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
-                                           TD->getIntPtrType(),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                           TD->getIntPtrType(*Context),
+					   Type::getInt8PtrTy(*Context),
                                            NULL);
   CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
   if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -157,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
   Tys[0] = Len->getType();
   Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
   return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
-                       Context->getConstantInt(Type::Int32Ty, Align));
+                       ConstantInt::get(Type::getInt32Ty(*Context), Align));
 }
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@@ -169,9 +169,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
   AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
 
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                         Type::Int32Ty, TD->getIntPtrType(),
+					 Type::getInt8PtrTy(*Context),
+					 Type::getInt8PtrTy(*Context),
+                                         Type::getInt32Ty(*Context),
+					 TD->getIntPtrType(*Context),
                                          NULL);
   CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
 
@@ -192,10 +193,10 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
                                    Attribute::NoUnwind);
 
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
-                                         Type::Int32Ty,
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
-                                         TD->getIntPtrType(), NULL);
+                                         Type::getInt32Ty(*Context),
+                                    Type::getInt8PtrTy(*Context),
+                                    Type::getInt8PtrTy(*Context),
+                                         TD->getIntPtrType(*Context), NULL);
   CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
                                Len, "memcmp");
 
@@ -213,7 +214,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
  const Type *Tys[1];
  Tys[0] = Len->getType();
  Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
+ Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
  return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
 }
 
@@ -222,14 +223,15 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
 Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
-                                                 IRBuilder<> &B) {
+                                                 IRBuilder<> &B,
+                                                 const AttrListPtr &Attrs) {
   char NameBuffer[20];
-  if (Op->getType() != Type::DoubleTy) {
+  if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
     unsigned NameLen = strlen(Name);
     assert(NameLen < sizeof(NameBuffer)-2);
     memcpy(NameBuffer, Name, NameLen);
-    if (Op->getType() == Type::FloatTy)
+    if (Op->getType()->isFloatTy())
       NameBuffer[NameLen] = 'f';  // floorf
     else
       NameBuffer[NameLen] = 'l';  // floorl
@@ -241,7 +243,7 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
   Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                          Op->getType(), NULL);
   CallInst *CI = B.CreateCall(Callee, Op, Name);
-
+  CI->setAttributes(Attrs);
   if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
@@ -252,10 +254,12 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
 /// is an integer.
 void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
-                                          Type::Int32Ty, NULL);
+  Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
+                                          Type::getInt32Ty(*Context), NULL);
   CallInst *CI = B.CreateCall(PutChar,
-                              B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+                              B.CreateIntCast(Char,
+					      Type::getInt32Ty(*Context),
+					      "chari"),
                               "putchar");
 
   if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
@@ -271,8 +275,8 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                       Type::Int32Ty,
-                                    Context->getPointerTypeUnqual(Type::Int8Ty),
+                                       Type::getInt32Ty(*Context),
+                                    Type::getInt8PtrTy(*Context),
                                        NULL);
   CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
   if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -289,12 +293,16 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   Constant *F;
   if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
-                               Type::Int32Ty, File->getType(), NULL);
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+			       Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context), File->getType(),
+			       NULL);
   else
-    F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
+    F = M->getOrInsertFunction("fputc",
+			       Type::getInt32Ty(*Context),
+			       Type::getInt32Ty(*Context),
                                File->getType(), NULL);
-  Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
+  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
   CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -311,12 +319,13 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
   AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   Constant *F;
   if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty,
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+			       Type::getInt32Ty(*Context),
+                               Type::getInt8PtrTy(*Context),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fputs", Type::Int32Ty,
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
+    F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context),
+                               Type::getInt8PtrTy(*Context),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
 
@@ -336,17 +345,19 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   Constant *F;
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
-                               TD->getIntPtrType(),
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
-                               TD->getIntPtrType(), TD->getIntPtrType(),
+                               TD->getIntPtrType(*Context),
+                               Type::getInt8PtrTy(*Context),
+                               TD->getIntPtrType(*Context),
+			       TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
-                               Context->getPointerTypeUnqual(Type::Int8Ty),
-                               TD->getIntPtrType(), TD->getIntPtrType(),
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
+                               Type::getInt8PtrTy(*Context),
+                               TD->getIntPtrType(*Context),
+			       TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                        Context->getConstantInt(TD->getIntPtrType(), 1), File);
+                        ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
@@ -362,30 +373,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
   // Look through noop bitcast instructions.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
     return GetStringLengthH(BCI->getOperand(0), PHIs);
-  
+
   // If this is a PHI node, there are two cases: either we have already seen it
   // or we haven't.
   if (PHINode *PN = dyn_cast<PHINode>(V)) {
     if (!PHIs.insert(PN))
       return ~0ULL;  // already in the set.
-    
+
     // If it was new, see if all the input strings are the same length.
     uint64_t LenSoFar = ~0ULL;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
       if (Len == 0) return 0; // Unknown length -> unknown.
-      
+
       if (Len == ~0ULL) continue;
-      
+
       if (Len != LenSoFar && LenSoFar != ~0ULL)
         return 0;    // Disagree -> unknown.
       LenSoFar = Len;
     }
-    
+
     // Success, all agree.
     return LenSoFar;
   }
-  
+
   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
   if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
@@ -397,7 +408,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (Len1 != Len2) return 0;
     return Len1;
   }
-  
+
   // If the value is not a GEP instruction nor a constant expression with a
   // GEP instruction, then return unknown.
   User *GEP = 0;
@@ -410,11 +421,11 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
   } else {
     return 0;
   }
-  
+
   // Make sure the GEP has exactly three arguments.
   if (GEP->getNumOperands() != 3)
     return 0;
-  
+
   // Check to make sure that the first operand of the GEP is an integer and
   // has value 0 so that we are sure we're indexing into the initializer.
   if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
@@ -422,7 +433,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
       return 0;
   } else
     return 0;
-  
+
   // If the second index isn't a ConstantInt, then this is a variable index
   // into the array.  If this occurs, we can't say anything meaningful about
   // the string.
@@ -431,28 +442,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     StartIdx = CI->getZExtValue();
   else
     return 0;
-  
+
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
   GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer())
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
     return 0;
   Constant *GlobalInit = GV->getInitializer();
-  
+
   // Handle the ConstantAggregateZero case, which is a degenerate case. The
   // initializer is constant zero so the length of the string must be zero.
   if (isa<ConstantAggregateZero>(GlobalInit))
     return 1;  // Len = 0 offset by 1.
-  
+
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || Array->getType()->getElementType() != Type::Int8Ty)
+  if (!Array ||
+      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
     return false;
-  
+
   // Get the number of elements in the array
   uint64_t NumElts = Array->getType()->getNumElements();
-  
+
   // Traverse the constant array from StartIdx (derived above) which is
   // the place the GEP refers to in the array.
   for (unsigned i = StartIdx; i != NumElts; ++i) {
@@ -463,7 +476,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (CI->isZero())
       return i-StartIdx+1; // We found end of string, success!
   }
-  
+
   return 0; // The array isn't null terminated, conservatively return 'unknown'.
 }
 
@@ -471,7 +484,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
 static uint64_t GetStringLength(Value *V) {
   if (!isa<PointerType>(V->getType())) return 0;
-  
+
   SmallPtrSet<PHINode*, 32> PHIs;
   uint64_t Len = GetStringLengthH(V, PHIs);
   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
@@ -480,7 +493,7 @@ static uint64_t GetStringLength(Value *V) {
 }
 
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero. 
+/// value is equal or not-equal to zero.
 static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
@@ -496,73 +509,38 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
 }
 
 //===----------------------------------------------------------------------===//
-// Miscellaneous LibCall Optimizations
-//===----------------------------------------------------------------------===//
-
-namespace {
-//===---------------------------------------===//
-// 'exit' Optimizations
-
-/// ExitOpt - int main() { exit(4); } --> int main() { return 4; }
-struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify we have a reasonable prototype for exit.
-    if (Callee->arg_size() == 0 || !CI->use_empty())
-      return 0;
-
-    // Verify the caller is main, and that the result type of main matches the
-    // argument type of exit.
-    if (!Caller->isName("main") || !Caller->hasExternalLinkage() ||
-        Caller->getReturnType() != CI->getOperand(1)->getType())
-      return 0;
-
-    TerminatorInst *OldTI = CI->getParent()->getTerminator();
-    
-    // Create the return after the call.
-    ReturnInst *RI = B.CreateRet(CI->getOperand(1));
-
-    // Drop all successor phi node entries.
-    for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i)
-      OldTI->getSuccessor(i)->removePredecessor(CI->getParent());
-    
-    // Erase all instructions from after our return instruction until the end of
-    // the block.
-    BasicBlock::iterator FirstDead = RI; ++FirstDead;
-    CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end());
-    return CI;
-  }
-};
-
-//===----------------------------------------------------------------------===//
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------===//
 // 'strcat' Optimizations
-
-struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
+namespace {
+struct StrCatOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType())
       return 0;
-    
+
     // Extract some information from the instruction
     Value *Dst = CI->getOperand(1);
     Value *Src = CI->getOperand(2);
-    
+
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
     if (Len == 0) return 0;
     --Len;  // Unbias length.
-    
+
     // Handle the simple, do-nothing case: strcat(x, "") -> x
     if (Len == 0)
       return Dst;
-    
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     EmitStrLenMemCpy(Src, Dst, Len, B);
     return Dst;
   }
@@ -571,28 +549,28 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
     Value *DstLen = EmitStrLen(Dst, B);
-    
+
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
     // the string .. we're concatenating).
     Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-    
+
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(CpyDst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);
   }
 };
 
 //===---------------------------------------===//
 // 'strncat' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
+struct StrNCatOpt : public StrCatOpt {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strncat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
         !isa<IntegerType>(FT->getParamType(2)))
@@ -619,6 +597,9 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
     // strncat(x,  c, 0) -> x
     if (SrcLen == 0 || Len == 0) return Dst;
 
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // We don't optimize this case
     if (Len < SrcLen) return 0;
 
@@ -632,27 +613,31 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {
 //===---------------------------------------===//
 // 'strchr' Optimizations
 
-struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
+struct StrChrOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strchr" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
-    
+
     Value *SrcStr = CI->getOperand(1);
-    
+
     // If the second operand is non-constant, see if we can compute the length
     // of the input string and turn this into memchr.
     ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
     if (CharC == 0) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32.
+      if (Len == 0 ||
+          FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32.
         return 0;
-      
+
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
-                        Context->getConstantInt(TD->getIntPtrType(), Len), B);
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len), B);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -660,24 +645,24 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
     std::string Str;
     if (!GetConstantStringInfo(SrcStr, Str))
       return 0;
-    
+
     // strchr can find the nul character.
     Str += '\0';
     char CharValue = CharC->getSExtValue();
-    
+
     // Compute the offset.
     uint64_t i = 0;
     while (1) {
       if (i == Str.size())    // Didn't find the char.  strchr returns null.
-        return Context->getNullValue(CI->getType());
+        return Constant::getNullValue(CI->getType());
       // Did we find our match?
       if (Str[i] == CharValue)
         break;
       ++i;
     }
-    
+
     // strchr(s+n,c)  -> gep(s+n+i,c)
-    Value *Idx = Context->getConstantInt(Type::Int64Ty, i);
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
     return B.CreateGEP(SrcStr, Idx, "strchr");
   }
 };
@@ -685,40 +670,44 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strcmp' Optimizations
 
-struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
+struct StrCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 2 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
-    
+
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
     bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
-    
+
     if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
       return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-    
+
     if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
       return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-    
+
     // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return Context->getConstantInt(CI->getType(), 
+      return ConstantInt::get(CI->getType(),
                                      strcmp(Str1.c_str(),Str2.c_str()));
 
     // strcmp(P, "x") -> memcmp(P, "x", 2)
     uint64_t Len1 = GetStringLength(Str1P);
     uint64_t Len2 = GetStringLength(Str2P);
     if (Len1 && Len2) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       return EmitMemCmp(Str1P, Str2P,
-                        Context->getConstantInt(TD->getIntPtrType(),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
                         std::min(Len1, Len2)), B);
     }
 
@@ -729,43 +718,44 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strncmp' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
+struct StrNCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strncmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 3 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
-    
+
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
     if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     // Get the length argument if it is constant.
     uint64_t Length;
     if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
       Length = LengthArg->getZExtValue();
     else
       return 0;
-    
+
     if (Length == 0) // strncmp(x,y,0)   -> 0
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
     bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
-    
+
     if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> *x
       return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-    
+
     if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
       return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-    
+
     // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return Context->getConstantInt(CI->getType(),
+      return ConstantInt::get(CI->getType(),
                               strncmp(Str1.c_str(), Str2.c_str(), Length));
     return 0;
   }
@@ -775,27 +765,30 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strcpy' Optimizations
 
-struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
+struct StrCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Verify the "strcpy" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty))
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
-    
+
     Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
     if (Dst == Src)      // strcpy(x,x)  -> x
       return Src;
-    
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
     if (Len == 0) return 0;
-    
+
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(Dst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
     return Dst;
   }
 };
@@ -803,12 +796,12 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strncpy' Optimizations
 
-struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
+struct StrNCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
       return 0;
 
@@ -823,7 +816,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B);
+      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
+		 B);
       return Dst;
     }
 
@@ -835,12 +829,15 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 
     if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
 
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // Let strncpy handle the zero padding
     if (Len > SrcLen+1) return 0;
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     EmitMemCpy(Dst, Src,
-               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
 
     return Dst;
   }
@@ -849,19 +846,19 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strlen' Optimizations
 
-struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
+struct StrLenOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
-        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     Value *Src = CI->getOperand(1);
 
     // Constant folding: strlen("xyz") -> 3
     if (uint64_t Len = GetStringLength(Src))
-      return Context->getConstantInt(CI->getType(), Len-1);
+      return ConstantInt::get(CI->getType(), Len-1);
 
     // Handle strlen(p) != 0.
     if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
@@ -875,7 +872,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'strto*' Optimizations
 
-struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {
+struct StrToOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
@@ -897,18 +894,18 @@ struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memcmp' Optimizations
 
-struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
+struct MemCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getReturnType() != Type::Int32Ty)
+        FT->getReturnType() != Type::getInt32Ty(*Context))
       return 0;
 
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
 
     if (LHS == RHS)  // memcmp(s,s,x) -> 0
-      return Context->getNullValue(CI->getType());
+      return Constant::getNullValue(CI->getType());
 
     // Make sure we have a constant length.
     ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
@@ -916,7 +913,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     uint64_t Len = LenC->getZExtValue();
 
     if (Len == 0) // memcmp(s1,s2,0) -> 0
-      return Context->getNullValue(CI->getType());
+      return Constant::getNullValue(CI->getType());
 
     if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
       Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
@@ -927,8 +924,8 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
     // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS)  != 0
     // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS)  != 0
     if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
-      const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ?
-                                               Type::Int16Ty : Type::Int32Ty);
+      const Type *PTy = PointerType::getUnqual(Len == 2 ?
+                       Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));
       LHS = B.CreateBitCast(LHS, PTy, "tmp");
       RHS = B.CreateBitCast(RHS, PTy, "tmp");
       LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
@@ -944,13 +941,16 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memcpy' Optimizations
 
-struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
+struct MemCpyOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -962,25 +962,28 @@ struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memmove' Optimizations
 
-struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
+struct MemMoveOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
     Module *M = Caller->getParent();
     Intrinsic::ID IID = Intrinsic::memmove;
     const Type *Tys[1];
-    Tys[0] = TD->getIntPtrType();
+    Tys[0] = TD->getIntPtrType(*Context);
     Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
     Value *Dst = CastToCStr(CI->getOperand(1), B);
     Value *Src = CastToCStr(CI->getOperand(2), B);
     Value *Size = CI->getOperand(3);
-    Value *Align = Context->getConstantInt(Type::Int32Ty, 1);
+    Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
     B.CreateCall4(MemMove, Dst, Src, Size, Align);
     return CI->getOperand(1);
   }
@@ -989,17 +992,21 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'memset' Optimizations
 
-struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {
+struct MemSetOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
-        FT->getParamType(1) != TD->getIntPtrType() ||
-        FT->getParamType(2) != TD->getIntPtrType())
+        !isa<IntegerType>(FT->getParamType(1)) ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-    Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty);
+    Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
+				 false);
     EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
     return CI->getOperand(1);
   }
@@ -1012,7 +1019,7 @@ struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'pow*' Optimizations
 
-struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
+struct PowOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
@@ -1021,40 +1028,44 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
         FT->getParamType(0) != FT->getParamType(1) ||
         !FT->getParamType(0)->isFloatingPoint())
       return 0;
-    
+
     Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
     if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
       if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
         return Op1C;
       if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
-        return EmitUnaryFloatFnCall(Op2, "exp2", B);
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
     }
-    
+
     ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
     if (Op2C == 0) return 0;
-    
+
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
-      return Context->getConstantFP(CI->getType(), 1.0);
-    
+      return ConstantFP::get(CI->getType(), 1.0);
+
     if (Op2C->isExactlyValue(0.5)) {
-      // FIXME: This is not safe for -0.0 and -inf.  This can only be done when
-      // 'unsafe' math optimizations are allowed.
-      // x    pow(x, 0.5)  sqrt(x)
-      // ---------------------------------------------
-      // -0.0    +0.0       -0.0
-      // -inf    +inf       NaN
-#if 0
-      // pow(x, 0.5) -> sqrt(x)
-      return B.CreateCall(get_sqrt(), Op1, "sqrt");
-#endif
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinite correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+      return Sel;
     }
-    
+
     if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
       return Op1;
     if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
       return B.CreateFMul(Op1, Op1, "pow2");
     if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
-      return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0),
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
                           Op1, "powrecip");
     return 0;
   }
@@ -1063,7 +1074,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'exp2' Optimizations
 
-struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
+struct Exp2Opt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 1 argument of FP type, which matches the
@@ -1071,35 +1082,38 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isFloatingPoint())
       return 0;
-    
+
     Value *Op = CI->getOperand(1);
     // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
     // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
     Value *LdExpArg = 0;
     if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
-        LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+        LdExpArg = B.CreateSExt(OpC->getOperand(0),
+				Type::getInt32Ty(*Context), "tmp");
     } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
-        LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+        LdExpArg = B.CreateZExt(OpC->getOperand(0),
+				Type::getInt32Ty(*Context), "tmp");
     }
 
     if (LdExpArg) {
       const char *Name;
-      if (Op->getType() == Type::FloatTy)
+      if (Op->getType()->isFloatTy())
         Name = "ldexpf";
-      else if (Op->getType() == Type::DoubleTy)
+      else if (Op->getType()->isDoubleTy())
         Name = "ldexp";
       else
         Name = "ldexpl";
 
-      Constant *One = Context->getConstantFP(APFloat(1.0f));
-      if (Op->getType() != Type::FloatTy)
-        One = Context->getConstantExprFPExtend(One, Op->getType());
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
 
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                             Op->getType(), Type::Int32Ty,NULL);
+                                             Op->getType(),
+					     Type::getInt32Ty(*Context),NULL);
       CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
@@ -1113,22 +1127,23 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
 //===---------------------------------------===//
 // Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
 
-struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
+struct UnaryDoubleFPOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
-        FT->getParamType(0) != Type::DoubleTy)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
       return 0;
 
     // If this is something like 'floor((double)floatval)', convert to floorf.
     FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
-    if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
       return 0;
 
     // floor((double)floatval) -> (double)floorf(floatval)
     Value *V = Cast->getOperand(0);
-    V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B);
-    return B.CreateFPExt(V, Type::DoubleTy);
+    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+                             Callee->getAttributes());
+    return B.CreateFPExt(V, Type::getDoubleTy(*Context));
   }
 };
 
@@ -1139,54 +1154,56 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'ffs*' Optimizations
 
-struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
+struct FFSOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
-    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty ||
+    if (FT->getNumParams() != 1 ||
+	FT->getReturnType() != Type::getInt32Ty(*Context) ||
         !isa<IntegerType>(FT->getParamType(0)))
       return 0;
-    
+
     Value *Op = CI->getOperand(1);
-    
+
     // Constant fold.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
       if (CI->getValue() == 0)  // ffs(0) -> 0.
-        return Context->getNullValue(CI->getType());
-      return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1
+        return Constant::getNullValue(CI->getType());
+      return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
                               CI->getValue().countTrailingZeros()+1);
     }
-    
+
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
     const Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
                                          Intrinsic::cttz, &ArgType, 1);
     Value *V = B.CreateCall(F, Op, "cttz");
-    V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp");
-    V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp");
-    
-    Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp");
-    return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0));
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+    V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+    return B.CreateSelect(Cond, V,
+			  ConstantInt::get(Type::getInt32Ty(*Context), 0));
   }
 };
 
 //===---------------------------------------===//
 // 'isdigit' Optimizations
 
-struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
+struct IsDigitOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isdigit(c) -> (c-'0') <u 10
     Value *Op = CI->getOperand(1);
-    Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'), 
+    Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
                      "isdigittmp");
-    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10), 
+    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
                          "isdigit");
     return B.CreateZExt(Op, CI->getType());
   }
@@ -1195,58 +1212,58 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'isascii' Optimizations
 
-struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization {
+struct IsAsciiOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isascii(c) -> c <u 128
     Value *Op = CI->getOperand(1);
-    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128),
+    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
                          "isascii");
     return B.CreateZExt(Op, CI->getType());
   }
 };
-  
+
 //===---------------------------------------===//
 // 'abs', 'labs', 'llabs' Optimizations
 
-struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization {
+struct AbsOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(integer) where the types agree.
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
-    
+
     // abs(x) -> x >s -1 ? x : -x
     Value *Op = CI->getOperand(1);
-    Value *Pos = B.CreateICmpSGT(Op, 
-                             Context->getConstantIntAllOnesValue(Op->getType()),
+    Value *Pos = B.CreateICmpSGT(Op,
+                             Constant::getAllOnesValue(Op->getType()),
                                  "ispos");
     Value *Neg = B.CreateNeg(Op, "neg");
     return B.CreateSelect(Pos, Op, Neg);
   }
 };
-  
+
 
 //===---------------------------------------===//
 // 'toascii' Optimizations
 
-struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
+struct ToAsciiOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != Type::Int32Ty)
+        FT->getParamType(0) != Type::getInt32Ty(*Context))
       return 0;
-    
+
     // isascii(c) -> c & 0x7f
     return B.CreateAnd(CI->getOperand(1),
-                       Context->getConstantInt(CI->getType(),0x7F));
+                       ConstantInt::get(CI->getType(),0x7F));
   }
 };
 
@@ -1257,15 +1274,15 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'printf' Optimizations
 
-struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
+struct PrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require one fixed pointer argument and an integer/void result.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
         !(isa<IntegerType>(FT->getReturnType()) ||
-          FT->getReturnType() == Type::VoidTy))
+          FT->getReturnType()->isVoidTy()))
       return 0;
-    
+
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
@@ -1273,39 +1290,39 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
 
     // Empty format string -> noop.
     if (FormatStr.empty())  // Tolerate printf's declared void.
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 0);
-    
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
     // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B);
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 1);
+      EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 1);
     }
-    
+
     // printf("foo\n") --> puts("foo")
     if (FormatStr[FormatStr.size()-1] == '\n' &&
         FormatStr.find('%') == std::string::npos) {  // no format characters.
       // Create a string literal with no \n on it.  We expect the constant merge
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr.erase(FormatStr.end()-1);
-      Constant *C = Context->getConstantArray(FormatStr, true);
-      C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage,
-                             C, "str", Callee->getParent());
+      Constant *C = ConstantArray::get(*Context, FormatStr, true);
+      C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+                             GlobalVariable::InternalLinkage, C, "str");
       EmitPutS(C, B);
-      return CI->use_empty() ? (Value*)CI : 
-                    Context->getConstantInt(CI->getType(), FormatStr.size()+1);
+      return CI->use_empty() ? (Value*)CI :
+                    ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
-    
+
     // Optimize specific format strings.
     // printf("%c", chr) --> putchar(*(i8*)dst)
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
         isa<IntegerType>(CI->getOperand(2)->getType())) {
       EmitPutChar(CI->getOperand(2), B);
-      return CI->use_empty() ? (Value*)CI : 
-                               Context->getConstantInt(CI->getType(), 1);
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 1);
     }
-    
+
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
         isa<PointerType>(CI->getOperand(2)->getType()) &&
@@ -1320,7 +1337,7 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'sprintf' Optimizations
 
-struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
+struct SPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed pointer arguments and an integer result.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1333,7 +1350,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
       return 0;
-    
+
     // If we just have a format string (nothing else crazy) transform it.
     if (CI->getNumOperands() == 3) {
       // Make sure there's no % in the constant array.  We could try to handle
@@ -1341,41 +1358,49 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')
           return 0; // we found a format specifier, bail out.
-      
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-          Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B);
-      return Context->getConstantInt(CI->getType(), FormatStr.size());
+          ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+      return ConstantInt::get(CI->getType(), FormatStr.size());
     }
-    
+
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
       return 0;
-    
+
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
       if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
-      Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char");
+      Value *V = B.CreateTrunc(CI->getOperand(3),
+			       Type::getInt8Ty(*Context), "char");
       Value *Ptr = CastToCStr(CI->getOperand(1), B);
       B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul");
-      B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr);
-      
-      return Context->getConstantInt(CI->getType(), 1);
+      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
+			"nul");
+      B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
     }
-    
+
     if (FormatStr[1] == 's') {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
       if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
 
       Value *Len = EmitStrLen(CI->getOperand(3), B);
       Value *IncLen = B.CreateAdd(Len,
-                                  Context->getConstantInt(Len->getType(), 1),
+                                  ConstantInt::get(Len->getType(), 1),
                                   "leninc");
       EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
-      
+
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
     }
@@ -1386,7 +1411,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fwrite' Optimizations
 
-struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
+struct FWriteOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require a pointer, an integer, an integer, a pointer, returning integer.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1396,22 +1421,22 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
         !isa<PointerType>(FT->getParamType(3)) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     // Get the element size and count.
     ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
     ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
     if (!SizeC || !CountC) return 0;
     uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
-    
+
     // If this is writing zero records, remove the call (it's a noop).
     if (Bytes == 0)
-      return Context->getConstantInt(CI->getType(), 0);
-    
+      return ConstantInt::get(CI->getType(), 0);
+
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
       EmitFPutC(Char, CI->getOperand(4), B);
-      return Context->getConstantInt(CI->getType(), 1);
+      return ConstantInt::get(CI->getType(), 1);
     }
 
     return 0;
@@ -1421,20 +1446,23 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fputs' Optimizations
 
-struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
+struct FPutsOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
     // Require two pointers.  Also, we can't optimize if return value is used.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
         !CI->use_empty())
       return 0;
-    
+
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
     uint64_t Len = GetStringLength(CI->getOperand(1));
     if (!Len) return 0;
     EmitFWrite(CI->getOperand(1),
-               Context->getConstantInt(TD->getIntPtrType(), Len-1),
+               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
                CI->getOperand(2), B);
     return CI;  // Known to have no uses (see above).
   }
@@ -1443,7 +1471,7 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
 //===---------------------------------------===//
 // 'fprintf' Optimizations
 
-struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
+struct FPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed paramters as pointers and integer result.
     const FunctionType *FT = Callee->getFunctionType();
@@ -1451,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
         !isa<PointerType>(FT->getParamType(1)) ||
         !isa<IntegerType>(FT->getReturnType()))
       return 0;
-    
+
     // All the optimizations depend on the format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
@@ -1462,26 +1490,29 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
           return 0; // We found a format specifier.
-      
-      EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(),
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context),
                                                      FormatStr.size()),
                  CI->getOperand(1), B);
-      return Context->getConstantInt(CI->getType(), FormatStr.size());
+      return ConstantInt::get(CI->getType(), FormatStr.size());
     }
-    
+
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
     if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
       return 0;
-    
+
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> *(i8*)dst = chr
       if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
       EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
-      return Context->getConstantInt(CI->getType(), 1);
+      return ConstantInt::get(CI->getType(), 1);
     }
-    
+
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) -> fputs(str, F)
       if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
@@ -1502,10 +1533,8 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
 namespace {
   /// This pass optimizes well known library functions from libc and libm.
   ///
-  class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass {
+  class SimplifyLibCalls : public FunctionPass {
     StringMap<LibCallOptimization*> Optimizations;
-    // Miscellaneous LibCall Optimizations
-    ExitOpt Exit; 
     // String and Memory LibCall Optimizations
     StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
     StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen;
@@ -1536,7 +1565,6 @@ namespace {
     bool doInitialization(Module &M);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<TargetData>();
     }
   };
   char SimplifyLibCalls::ID = 0;
@@ -1547,15 +1575,12 @@ X("simplify-libcalls", "Simplify well-known library calls");
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
-  return new SimplifyLibCalls(); 
+  return new SimplifyLibCalls();
 }
 
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
-  // Miscellaneous LibCall Optimizations
-  Optimizations["exit"] = &Exit;
-  
   // String and Memory LibCall Optimizations
   Optimizations["strcat"] = &StrCat;
   Optimizations["strncat"] = &StrNCat;
@@ -1576,7 +1601,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["memcpy"] = &MemCpy;
   Optimizations["memmove"] = &MemMove;
   Optimizations["memset"] = &MemSet;
-  
+
   // Math Library Optimizations
   Optimizations["powf"] = &Pow;
   Optimizations["pow"] = &Pow;
@@ -1594,7 +1619,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["llvm.exp2.f80"] = &Exp2;
   Optimizations["llvm.exp2.f64"] = &Exp2;
   Optimizations["llvm.exp2.f32"] = &Exp2;
-  
+
 #ifdef HAVE_FLOORF
   Optimizations["floor"] = &UnaryDoubleFP;
 #endif
@@ -1610,7 +1635,7 @@ void SimplifyLibCalls::InitOptimizations() {
 #ifdef HAVE_NEARBYINTF
   Optimizations["nearbyint"] = &UnaryDoubleFP;
 #endif
-  
+
   // Integer Optimizations
   Optimizations["ffs"] = &FFS;
   Optimizations["ffsl"] = &FFS;
@@ -1621,7 +1646,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["isdigit"] = &IsDigit;
   Optimizations["isascii"] = &IsAscii;
   Optimizations["toascii"] = &ToAscii;
-  
+
   // Formatting and IO Optimizations
   Optimizations["sprintf"] = &SPrintF;
   Optimizations["printf"] = &PrintF;
@@ -1636,10 +1661,10 @@ void SimplifyLibCalls::InitOptimizations() {
 bool SimplifyLibCalls::runOnFunction(Function &F) {
   if (Optimizations.empty())
     InitOptimizations();
-  
-  const TargetData &TD = getAnalysis<TargetData>();
-  
-  IRBuilder<> Builder;
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  IRBuilder<> Builder(F.getContext());
 
   bool Changed = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -1647,37 +1672,35 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I++);
       if (!CI) continue;
-      
+
       // Ignore indirect calls and calls to non-external functions.
       Function *Callee = CI->getCalledFunction();
       if (Callee == 0 || !Callee->isDeclaration() ||
           !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
         continue;
-      
+
       // Ignore unknown calls.
-      const char *CalleeName = Callee->getNameStart();
-      StringMap<LibCallOptimization*>::iterator OMI =
-        Optimizations.find(CalleeName, CalleeName+Callee->getNameLen());
-      if (OMI == Optimizations.end()) continue;
-      
+      LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+      if (!LCO) continue;
+
       // Set the builder to the instruction after the call.
       Builder.SetInsertPoint(BB, I);
-      
+
       // Try to optimize this call.
-      Value *Result = OMI->second->OptimizeCall(CI, TD, Builder);
+      Value *Result = LCO->OptimizeCall(CI, TD, Builder);
       if (Result == 0) continue;
 
-      DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI;
-            DOUT << "  into: " << *Result << "\n");
-      
+      DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI;
+            errs() << "  into: " << *Result << "\n");
+
       // Something changed!
       Changed = true;
       ++NumSimplified;
-      
+
       // Inspect the instruction after the call (which was potentially just
       // added) next.
       I = CI; ++I;
-      
+
       if (CI != Result && !CI->use_empty()) {
         CI->replaceAllUsesWith(Result);
         if (!Result->hasName())
@@ -1736,40 +1759,39 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
     if (!F.isDeclaration())
       continue;
 
-    unsigned NameLen = F.getNameLen();
-    if (!NameLen)
+    if (!F.hasName())
       continue;
 
     const FunctionType *FTy = F.getFunctionType();
 
-    const char *NameStr = F.getNameStart();
-    switch (NameStr[0]) {
+    StringRef Name = F.getName();
+    switch (Name[0]) {
       case 's':
-        if (NameLen == 6 && !strcmp(NameStr, "strlen")) {
+        if (Name == "strlen") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "stpcpy")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strcat")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtol")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtod")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strtof")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtoul")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtoll")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strtold")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncat")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncpy")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "strtoull"))) {
+        } else if (Name == "strcpy" ||
+                   Name == "stpcpy" ||
+                   Name == "strcat" ||
+                   Name == "strtol" ||
+                   Name == "strtod" ||
+                   Name == "strtof" ||
+                   Name == "strtoul" ||
+                   Name == "strtoll" ||
+                   Name == "strtold" ||
+                   Name == "strncat" ||
+                   Name == "strncpy" ||
+                   Name == "strtoull") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) {
+        } else if (Name == "strxfrm") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1777,13 +1799,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "strspn")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strncmp")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strcspn")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strcoll")) ||
-                   (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) {
+        } else if (Name == "strcmp" ||
+                   Name == "strspn" ||
+                   Name == "strncmp" ||
+                   Name ==" strcspn" ||
+                   Name == "strcoll" ||
+                   Name == "strcasecmp" ||
+                   Name == "strncasecmp") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1792,31 +1814,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) {
+        } else if (Name == "strstr" ||
+                   Name == "strpbrk") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) {
+        } else if (Name == "strtok" ||
+                   Name == "strtok_r") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "setbuf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) {
+        } else if (Name == "scanf" ||
+                   Name == "setbuf" ||
+                   Name == "setvbuf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "strndup"))) {
+        } else if (Name == "strdup" ||
+                   Name == "strndup") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -1824,10 +1846,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "sscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "sprintf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "statvfs"))) {
+        } else if (Name == "stat" ||
+                   Name == "sscanf" ||
+                   Name == "sprintf" ||
+                   Name == "statvfs") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1835,7 +1857,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) {
+        } else if (Name == "snprintf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -1843,7 +1865,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 3);
-        } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) {
+        } else if (Name == "setitimer") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -1851,7 +1873,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
           setDoesNotCapture(F, 3);
-        } else if (NameLen == 6 && !strcmp(NameStr, "system")) {
+        } else if (Name == "system") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1860,7 +1882,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'm':
-        if (NameLen == 6 && !strcmp(NameStr, "memcmp")) {
+        if (Name == "malloc") {
+          if (FTy->getNumParams() != 1 ||
+              !isa<PointerType>(FTy->getReturnType()))
+            continue;
+          setDoesNotThrow(F);
+          setDoesNotAlias(F, 0);
+        } else if (Name == "memcmp") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1869,29 +1897,29 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memrchr"))) {
+        } else if (Name == "memchr" ||
+                   Name == "memrchr") {
           if (FTy->getNumParams() != 3)
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "modff")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "modfl")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "memcpy")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memccpy")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "memmove"))) {
+        } else if (Name == "modf" ||
+                   Name == "modff" ||
+                   Name == "modfl" ||
+                   Name == "memcpy" ||
+                   Name == "memccpy" ||
+                   Name == "memmove") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) {
+        } else if (Name == "memalign") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "mktime"))) {
+        } else if (Name == "mkdir" ||
+                   Name == "mktime") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1900,7 +1928,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'r':
-        if (NameLen == 7 && !strcmp(NameStr, "realloc")) {
+        if (Name == "realloc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getReturnType()))
@@ -1908,23 +1936,23 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 4 && !strcmp(NameStr, "read")) {
+        } else if (Name == "read") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           // May throw; "read" is a valid pthread cancellation point.
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "rewind")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "remove")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "realpath"))) {
+        } else if (Name == "rmdir" ||
+                   Name == "rewind" ||
+                   Name == "remove" ||
+                   Name == "realpath") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "readlink"))) {
+        } else if (Name == "rename" ||
+                   Name == "readlink") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1935,7 +1963,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'w':
-        if (NameLen == 5 && !strcmp(NameStr, "write")) {
+        if (Name == "write") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
@@ -1944,7 +1972,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'b':
-        if (NameLen == 5 && !strcmp(NameStr, "bcopy")) {
+        if (Name == "bcopy") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1952,7 +1980,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) {
+        } else if (Name == "bcmp") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -1961,7 +1989,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) {
+        } else if (Name == "bzero") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1970,17 +1998,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'c':
-        if (NameLen == 6 && !strcmp(NameStr, "calloc")) {
+        if (Name == "calloc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "chown")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "ctermid")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "clearerr")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "closedir"))) {
+        } else if (Name == "chmod" ||
+                   Name == "chown" ||
+                   Name == "ctermid" ||
+                   Name == "clearerr" ||
+                   Name == "closedir") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -1989,17 +2017,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'a':
-        if ((NameLen == 4 && !strcmp(NameStr, "atoi")) ||
-            (NameLen == 4 && !strcmp(NameStr, "atol")) ||
-            (NameLen == 4 && !strcmp(NameStr, "atof")) ||
-            (NameLen == 5 && !strcmp(NameStr, "atoll"))) {
+        if (Name == "atoi" ||
+            Name == "atol" ||
+            Name == "atof" ||
+            Name == "atoll") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "access")) {
+        } else if (Name == "access") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2008,7 +2036,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'f':
-        if (NameLen == 5 && !strcmp(NameStr, "fopen")) {
+        if (Name == "fopen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2018,7 +2046,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) {
+        } else if (Name == "fdopen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2026,52 +2054,52 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) ||
-                   (NameLen == 4 && !strcmp(NameStr, "free")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fseek")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "ftell")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fgetc")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fseeko")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "ftello")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fileno")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fflush")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fclose")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fsetpos")) ||
-                   (NameLen == 9 && !strcmp(NameStr, "flockfile")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "funlockfile")) ||
-                   (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) {
+        } else if (Name == "feof" ||
+                   Name == "free" ||
+                   Name == "fseek" ||
+                   Name == "ftell" ||
+                   Name == "fgetc" ||
+                   Name == "fseeko" ||
+                   Name == "ftello" ||
+                   Name == "fileno" ||
+                   Name == "fflush" ||
+                   Name == "fclose" ||
+                   Name == "fsetpos" ||
+                   Name == "flockfile" ||
+                   Name == "funlockfile" ||
+                   Name == "ftrylockfile") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) {
+        } else if (Name == "ferror") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setOnlyReadsMemory(F);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "fstat")) ||
-                   (NameLen == 5 && !strcmp(NameStr, "frexp")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "frexpf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "frexpl")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) {
+        } else if (Name == "fputc" ||
+                   Name == "fstat" ||
+                   Name == "frexp" ||
+                   Name == "frexpf" ||
+                   Name == "frexpl" ||
+                   Name == "fstatvfs") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) {
+        } else if (Name == "fgets") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 3);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fwrite"))) {
+        } else if (Name == "fread" ||
+                   Name == "fwrite") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(3)))
@@ -2079,10 +2107,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 4);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "fscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fprintf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) {
+        } else if (Name == "fputs" ||
+                   Name == "fscanf" ||
+                   Name == "fprintf" ||
+                   Name == "fgetpos") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2093,31 +2121,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'g':
-        if ((NameLen == 4 && !strcmp(NameStr, "getc")) ||
-            (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) ||
-            (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) {
+        if (Name == "getc" ||
+            Name == "getlogin_r" ||
+            Name == "getc_unlocked") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) {
+        } else if (Name == "getenv") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "getchar"))) {
+        } else if (Name == "gets" ||
+                   Name == "getchar") {
           setDoesNotThrow(F);
-        } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) {
+        } else if (Name == "getitimer") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) {
+        } else if (Name == "getpwnam") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2126,22 +2154,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'u':
-        if (NameLen == 6 && !strcmp(NameStr, "ungetc")) {
+        if (Name == "ungetc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "unlink")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) {
+        } else if (Name == "uname" ||
+                   Name == "unlink" ||
+                   Name == "unsetenv") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "utimes"))) {
+        } else if (Name == "utime" ||
+                   Name == "utimes") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2152,30 +2180,30 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'p':
-        if (NameLen == 4 && !strcmp(NameStr, "putc")) {
+        if (Name == "putc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "printf")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "perror"))) {
+        } else if (Name == "puts" ||
+                   Name == "printf" ||
+                   Name == "perror") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) ||
-                   (NameLen == 6 && !strcmp(NameStr, "pwrite"))) {
+        } else if (Name == "pread" ||
+                   Name == "pwrite") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           // May throw; these are valid pthread cancellation points.
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) {
+        } else if (Name == "putchar") {
           setDoesNotThrow(F);
-        } else if (NameLen == 5 && !strcmp(NameStr, "popen")) {
+        } else if (Name == "popen") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2185,7 +2213,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) {
+        } else if (Name == "pclose") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2194,14 +2222,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'v':
-        if (NameLen == 6 && !strcmp(NameStr, "vscanf")) {
+        if (Name == "vscanf") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) ||
-                   (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) {
+        } else if (Name == "vsscanf" ||
+                   Name == "vfscanf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -2209,19 +2237,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) {
+        } else if (Name == "valloc") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) {
+        } else if (Name == "vprintf") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) {
+        } else if (Name == "vfprintf" ||
+                   Name == "vsprintf") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2229,7 +2257,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) {
+        } else if (Name == "vsnprintf") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(2)))
@@ -2240,13 +2268,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'o':
-        if (NameLen == 4 && !strcmp(NameStr, "open")) {
+        if (Name == "open") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           // May throw; "open" is a valid pthread cancellation point.
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) {
+        } else if (Name == "opendir") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -2257,12 +2285,12 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 't':
-        if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) {
+        if (Name == "tmpfile") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if (NameLen == 5 && !strcmp(NameStr, "times")) {
+        } else if (Name == "times") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2271,21 +2299,21 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'h':
-        if ((NameLen == 5 && !strcmp(NameStr, "htonl")) ||
-            (NameLen == 5 && !strcmp(NameStr, "htons"))) {
+        if (Name == "htonl" ||
+            Name == "htons") {
           setDoesNotThrow(F);
           setDoesNotAccessMemory(F);
         }
         break;
       case 'n':
-        if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) ||
-            (NameLen == 5 && !strcmp(NameStr, "ntohs"))) {
+        if (Name == "ntohl" ||
+            Name == "ntohs") {
           setDoesNotThrow(F);
           setDoesNotAccessMemory(F);
         }
         break;
       case 'l':
-        if (NameLen == 5 && !strcmp(NameStr, "lstat")) {
+        if (Name == "lstat") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2293,7 +2321,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) {
+        } else if (Name == "lchown") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
@@ -2302,7 +2330,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 'q':
-        if (NameLen == 5 && !strcmp(NameStr, "qsort")) {
+        if (Name == "qsort") {
           if (FTy->getNumParams() != 4 ||
               !isa<PointerType>(FTy->getParamType(3)))
             continue;
@@ -2311,8 +2339,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case '_':
-        if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) ||
-            (NameLen == 9 && !strcmp(NameStr, "__strndup"))) {
+        if (Name == "__strdup" ||
+            Name == "__strndup") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)))
@@ -2320,19 +2348,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) {
+        } else if (Name == "__strtok_r") {
           if (FTy->getNumParams() != 3 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) {
+        } else if (Name == "_IO_getc") {
           if (FTy->getNumParams() != 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) {
+        } else if (Name == "_IO_putc") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
@@ -2341,16 +2369,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         }
         break;
       case 1:
-        if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) {
+        if (Name == "\1__isoc99_scanf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) ||
-                   (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) ||
-                   (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) ||
-                   (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) {
+        } else if (Name == "\1stat64" ||
+                   Name == "\1lstat64" ||
+                   Name == "\1statvfs64" ||
+                   Name == "\1__isoc99_sscanf") {
           if (FTy->getNumParams() < 1 ||
               !isa<PointerType>(FTy->getParamType(0)) ||
               !isa<PointerType>(FTy->getParamType(1)))
@@ -2358,7 +2386,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) {
+        } else if (Name == "\1fopen64") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getReturnType()) ||
               !isa<PointerType>(FTy->getParamType(0)) ||
@@ -2368,26 +2396,26 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
-        } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) ||
-                   (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) {
+        } else if (Name == "\1fseeko64" ||
+                   Name == "\1ftello64") {
           if (FTy->getNumParams() == 0 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
-        } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) {
+        } else if (Name == "\1tmpfile64") {
           if (!isa<PointerType>(FTy->getReturnType()))
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
-        } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) ||
-                   (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) {
+        } else if (Name == "\1fstat64" ||
+                   Name == "\1fstatvfs64") {
           if (FTy->getNumParams() != 2 ||
               !isa<PointerType>(FTy->getParamType(1)))
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
-        } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) {
+        } else if (Name == "\1open64") {
           if (FTy->getNumParams() < 2 ||
               !isa<PointerType>(FTy->getParamType(0)))
             continue;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index c037ee9..68689d6 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -30,8 +30,8 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <map>
@@ -45,7 +45,7 @@ TailDupThreshold("taildup-threshold",
                  cl::init(1), cl::Hidden);
 
 namespace {
-  class VISIBILITY_HIDDEN TailDup : public FunctionPass {
+  class TailDup : public FunctionPass {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -128,7 +128,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
     // other instructions.
     if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
 
-    // Allso alloca and malloc.
+    // Also alloca and malloc.
     if (isa<AllocationInst>(I)) return false;
 
     // Some vector instructions can expand into a number of instructions.
@@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
   BasicBlock *DestBlock = Branch->getSuccessor(0);
   assert(SourceBlock != DestBlock && "Our predicate is broken!");
 
-  DOUT << "TailDuplication[" << SourceBlock->getParent()->getName()
-       << "]: Eliminating branch: " << *Branch;
+  DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+        << "]: Eliminating branch: " << *Branch);
 
   // See if we can avoid duplicating code by moving it up to a dominator of both
   // blocks.
   if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
-    DOUT << "Found shared dominator: " << DomBlock->getName() << "\n";
+    DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n");
 
     // If there are non-phi instructions in DestBlock that have no operands
     // defined in DestBlock, and if the instruction has no side effects, we can
@@ -258,7 +258,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
     while (!isa<TerminatorInst>(BBI)) {
       Instruction *I = BBI++;
 
-      bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects();
+      bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+                      !I->mayReadFromMemory();
       if (CanHoist) {
         for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
           if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
@@ -271,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
           // Remove from DestBlock, move right before the term in DomBlock.
           DestBlock->getInstList().remove(I);
           DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
-          DOUT << "Hoisted: " << *I;
+          DEBUG(errs() << "Hoisted: " << *I);
         }
       }
     }
@@ -358,7 +359,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
       Instruction *Inst = BI++;
       if (isInstructionTriviallyDead(Inst))
         Inst->eraseFromParent();
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
+      else if (Constant *C = ConstantFoldInstruction(Inst,
+                                                     Inst->getContext())) {
         Inst->replaceAllUsesWith(C);
         Inst->eraseFromParent();
       }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 34ee57c..b56e170 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -60,14 +60,13 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 STATISTIC(NumEliminated, "Number of tail calls removed");
 STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 
 namespace {
-  struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass {
+  struct TailCallElim : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
     TailCallElim() : FunctionPass(&ID) {}
 
@@ -394,7 +393,7 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   // create the new entry block, allowing us to branch back to the old entry.
   if (OldEntry == 0) {
     OldEntry = &F->getEntryBlock();
-    BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry);
+    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
     NewEntry->takeName(OldEntry);
     OldEntry->setName("tailrecurse");
     BranchInst::Create(OldEntry, NewEntry);
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 71049fa..135a621 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -19,17 +19,18 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-void ExtAddrMode::print(OStream &OS) const {
+void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
   if (BaseGV) {
     OS << (NeedPlus ? " + " : "")
        << "GV:";
-    WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const {
   if (BaseReg) {
     OS << (NeedPlus ? " + " : "")
        << "Base:";
-    WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
     NeedPlus = true;
   }
   if (Scale) {
     OS << (NeedPlus ? " + " : "")
        << Scale << "*";
-    WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false);
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
     NeedPlus = true;
   }
 
@@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const {
 }
 
 void ExtAddrMode::dump() const {
-  print(cerr);
-  cerr << '\n';
+  print(errs());
+  errs() << '\n';
 }
 
 
@@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     if (!RHS) return false;
     int64_t Scale = RHS->getSExtValue();
     if (Opcode == Instruction::Shl)
-      Scale = 1 << Scale;
+      Scale = 1LL << Scale;
     
     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
   }
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6d1180d..4931ab3 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -23,6 +24,8 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include <algorithm>
 using namespace llvm;
@@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
       Value *RetVal = 0;
 
       // Create a value to return... if the function doesn't return null...
-      if (BB->getParent()->getReturnType() != Type::VoidTy)
+      if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext()))
         RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
 
       // Create the return...
-      NewTI = ReturnInst::Create(RetVal);
+      NewTI = ReturnInst::Create(TI->getContext(), RetVal);
     }
     break;
 
@@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
   case Instruction::Switch:    // Should remove entry
   default:
   case Instruction::Ret:       // Cannot happen, has no successors!
-    assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!");
-    abort();
+    llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
   }
 
   if (NewTI)   // If it's a different instruction, replace.
@@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     ++SplitIt;
   BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
 
-  // The new block lives in whichever loop the old one did.
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
   if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
     if (Loop *L = LI->getLoopFor(Old))
       L->addBasicBlockToLoop(New, LI->getBase());
@@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 /// Preds array, which has NumPreds elements in it.  The new block is given a
 /// suffix of 'Suffix'.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and
-/// DominanceFrontier, but no other analyses.
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                          BasicBlock *const *Preds,
                                          unsigned NumPreds, const char *Suffix,
                                          Pass *P) {
   // Create new basic block, insert right before the original block.
-  BasicBlock *NewBB =
-    BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
   
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
   
+  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+  Loop *L = LI ? LI->getLoopFor(BB) : 0;
+  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
   // Move the edges from Preds to point to NewBB instead of BB.
-  for (unsigned i = 0; i != NumPreds; ++i)
+  // While here, if we need to preserve loop analyses, collect
+  // some information about how this split will affect loops.
+  bool HasLoopExit = false;
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  for (unsigned i = 0; i != NumPreds; ++i) {
     Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
-  
+
+    if (LI) {
+      // If we need to preserve LCSSA, determine if any of
+      // the preds is a loop exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Preds[i]))
+          if (!PL->contains(BB))
+            HasLoopExit = true;
+      // If we need to preserve LoopInfo, note whether any of the
+      // preds crosses an interesting loop boundary.
+      if (L) {
+        if (L->contains(Preds[i]))
+          IsLoopEntry = false;
+        else
+          SplitMakesNewLoopHeader = true;
+      }
+    }
+  }
+
   // Update dominator tree and dominator frontier if available.
   DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
   if (DT)
     DT->splitBlock(NewBB);
   if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
     DF->splitBlock(NewBB);
-  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
-  
-  
+
   // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
@@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
     return NewBB;
   }
+
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+  if (L) {
+    if (IsLoopEntry) {
+      if (Loop *PredLoop = LI->getLoopFor(Preds[0])) {
+        // Add the new block to the nearest enclosing loop (and not an
+        // adjacent loop).
+        while (PredLoop && !PredLoop->contains(BB))
+          PredLoop = PredLoop->getParentLoop();
+        if (PredLoop)
+          PredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      }
+    } else {
+      L->addBasicBlockToLoop(NewBB, LI->getBase());
+      if (SplitMakesNewLoopHeader)
+        L->moveToHeader(NewBB);
+    }
+  }
   
   // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
   for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I++);
     
     // Check to see if all of the values coming in are the same.  If so, we
-    // don't need to create a new PHI node.
-    Value *InVal = PN->getIncomingValueForBlock(Preds[0]);
-    for (unsigned i = 1; i != NumPreds; ++i)
-      if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
-        InVal = 0;
-        break;
-      }
-    
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1; i != NumPreds; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
     if (InVal) {
       // If all incoming values for the new PHI would be the same, just don't
       // make a new PHI.  Instead, just remove the incoming values from the old
@@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     // Add an incoming value to the PHI node in the loop for the preheader
     // edge.
     PN->addIncoming(InVal, NewBB);
-    
-    // Check to see if we can eliminate this phi node.
-    if (Value *V = PN->hasConstantValue(DT != 0)) {
-      Instruction *I = dyn_cast<Instruction>(V);
-      if (!I || DT == 0 || DT->dominates(I, PN)) {
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-      }
-    }
   }
   
   return NewBB;
@@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
   
-  // Test if the values come form identical arithmetic instructions.
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
   if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
       isa<PHINode>(A) || isa<GetElementPtrInst>(A))
     if (const Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalTo(BI))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
   
   // Otherwise they may not be equivalent.
@@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   unsigned AccessSize = 0;
   if (AA) {
     const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
-    AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy);
+    AccessSize = AA->getTypeStoreSize(AccessTy);
   }
   
   while (ScanFrom != ScanBB->begin()) {
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index 1650cfa..4b720b1 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "basicinliner"
-
 #include "llvm/Module.h"
 #include "llvm/Function.h"
 #include "llvm/Transforms/Utils/BasicInliner.h"
@@ -21,6 +20,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <vector>
 
@@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() {
       }
   }
   
-  DOUT << ": " << CallSites.size() << " call sites.\n";
+  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
   
   // Inline call sites.
   bool Changed = false;
@@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         InlineCost IC = CA.getInlineCost(CS, NeverInline);
         if (IC.isAlways()) {        
-          DOUT << "  Inlining: cost=always"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
         } else if (IC.isNever()) {
-          DOUT << "  NOT Inlining: cost=never"
-               <<", call: " << *CS.getInstruction();
+          DEBUG(errs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
           continue;
         } else {
           int Cost = IC.getValue();
           
           if (Cost >= (int) BasicInlineThreshold) {
-            DOUT << "  NOT Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
             continue;
           } else {
-            DOUT << "  Inlining: cost = " << Cost
-                 << ", call: " <<  *CS.getInstruction();
+            DEBUG(errs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
           }
         }
         
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c4fd1ea..849b2b5 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -21,11 +21,13 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -43,6 +45,7 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
 
       // No loop canonicalization guarantees are broken by this pass.
       AU.addPreservedID(LoopSimplifyID);
@@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
   return false;
 }
 
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
 /// split the critical edge.  This will update DominatorTree and
 /// DominatorFrontier  information if it is available, thus calling this pass
@@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 /// false otherwise.  This ensures that all edges to that dest go to one block
 /// instead of each going to a different block.
 //
-bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
-                             bool MergeIdenticalEdges) {
-  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false;
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
   BasicBlock *TIBB = TI->getParent();
   BasicBlock *DestBB = TI->getSuccessor(SuccNum);
 
   // Create a new basic block, linking it into the CFG.
-  BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." +
-                                         DestBB->getName() + "_crit_edge");
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
   // Create our unconditional branch...
   BranchInst::Create(DestBB, NewBB);
 
@@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
 
   // If we don't have a pass object, we can't update anything...
-  if (P == 0) return true;
+  if (P == 0) return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
   // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
@@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
     // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
     if (!OtherPreds.empty()) {
       // FIXME: IMPLEMENT THIS!
-      assert(0 && "Requiring domfrontiers but not idom/domtree/domset."
-             " not implemented yet!");
+      llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
+                       " not implemented yet!");
     }
     
     // Since the new block is dominated by its only predecessor TIBB,
@@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
   
   // Update LoopInfo if it is around.
   if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
-    // If one or the other blocks were not in a loop, the new block is not
-    // either, and thus LI doesn't need to be updated.
-    if (Loop *TIL = LI->getLoopFor(TIBB))
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
       if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
         if (TIL == DestLoop) {
           // Both in the same loop, the NewBB joins loop.
@@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
             P->addBasicBlockToLoop(NewBB, LI->getBase());
         }
       }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I)
+            if (TIL->contains(*I))
+              Preds.push_back(*I);
+            else
+              HasPredOutsideOfLoop = true;
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
   }
-  return true;
+
+  // Update ProfileInfo if it is around.
+  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
+    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
+  }
+
+  return NewBB;
 }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 10cae5c..f4394ea 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils
   CloneFunction.cpp
   CloneLoop.cpp
   CloneModule.cpp
-  CloneTrace.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
-  InlineCost.cpp
   InlineFunction.cpp
+  InstructionNamer.cpp
   LCSSA.cpp
   Local.cpp
   LoopSimplify.cpp
@@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils
   LowerSwitch.cpp
   Mem2Reg.cpp
   PromoteMemoryToRegister.cpp
-  SimplifyCFG.cpp
+  SSAUpdater.cpp
   SSI.cpp
+  SimplifyCFG.cpp
   UnifyFunctionExitNodes.cpp
   UnrollLoop.cpp
   ValueMapper.cpp
-  InstructionNamer.cpp
   )
 
 target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d0fdefa..30130fa 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   DenseMap<const Value*, Value*> &ValueMap,
                                   const char *NameSuffix, Function *F,
                                   ClonedCodeInfo *CodeInfo) {
-  BasicBlock *NewBB = BasicBlock::Create("", F);
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
 //
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              DenseMap<const Value*, Value*> &ValueMap,
-                             std::vector<ReturnInst*> &Returns,
+                             SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
@@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F,
       ValueMap[I] = DestI++;        // Add mapping to ValueMap
     }
 
-  std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
   CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
   return NewF;
 }
@@ -179,7 +180,7 @@ namespace {
     Function *NewFunc;
     const Function *OldFunc;
     DenseMap<const Value*, Value*> &ValueMap;
-    std::vector<ReturnInst*> &Returns;
+    SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
@@ -187,7 +188,7 @@ namespace {
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           DenseMap<const Value*, Value*> &valueMap,
-                          std::vector<ReturnInst*> &returns,
+                          SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
@@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   
   // Nope, clone it now.
   BasicBlock *NewBB;
-  BBEntry = NewBB = BasicBlock::Create();
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
@@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.
     if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {
       if (DbgFnStart == NULL) {
-        DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram()));
+        DISubprogram SP(DFSI->getSubprogram());
         if (SP.describes(BB->getParent()))
           DbgFnStart = DFSI->getSubprogram();
       }
@@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 /// mapping its operands through ValueMap if they are available.
 Constant *PruningFunctionCloner::
 ConstantFoldMappedInstruction(const Instruction *I) {
+  LLVMContext &Context = I->getContext();
+  
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           ValueMap)))
+                                                           ValueMap,
+                                                           Context)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                           &Ops[0], Ops.size(), TD);
+                                           &Ops[0], Ops.size(), 
+                                           Context, TD);
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                                           CE);
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
-                                  Ops.size(), TD);
+                                  Ops.size(), Context, TD);
 }
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) {
 /// used for things like CloneFunction or CloneModule.
 void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      DenseMap<const Value*, Value*> &ValueMap,
-                                     std::vector<ReturnInst*> &Returns,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      const TargetData *TD) {
   assert(NameSuffix && "NameSuffix cannot be null!");
+  LLVMContext &Context = OldFunc->getContext();
   
 #ifndef NDEBUG
   for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
@@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // insert it into the new function in the right order.  If not, ignore it.
   //
   // Defer PHI resolution until rest of function is resolved.
-  std::vector<const PHINode*> PHIToResolve;
+  SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
     BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
@@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
         if (BasicBlock *MappedBlock = 
             cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
-          Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap);
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  ValueMap, Context);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 82f5b93..0285f8c 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M,
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(),
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
                                             false,
                                             GlobalValue::ExternalLinkage, 0,
-                                            I->getName(), New);
+                                            I->getName());
     GV->setAlignment(I->getAlignment());
     ValueMap[I] = GV;
   }
@@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,
     GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 ValueMap)));
+                                                 ValueMap,
+                                                 M->getContext())));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M,
         ValueMap[J] = DestI++;
       }
 
-      std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
       CloneFunctionInto(F, I, ValueMap, Returns);
     }
 
@@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext())));
   }
   
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 6d5904e..c39ccf7 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
@@ -27,6 +28,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <set>
@@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
 void CodeExtractor::splitReturnBlocks() {
   for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
          E = BlocksToExtract.end(); I != E; ++I)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator()))
-      (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node domiantes all other nodes dominated
+        //by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
 }
 
 // findInputsOutputs - Find inputs to, outputs from the code region.
@@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
                                            BasicBlock *newHeader,
                                            Function *oldFunction,
                                            Module *M) {
-  DOUT << "inputs: " << inputs.size() << "\n";
-  DOUT << "outputs: " << outputs.size() << "\n";
+  DEBUG(errs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(errs() << "outputs: " << outputs.size() << "\n");
 
   // This function returns unsigned, outputs will go back by reference.
   switch (NumExitBlocks) {
   case 0:
-  case 1: RetTy = Type::VoidTy; break;
-  case 2: RetTy = Type::Int1Ty; break;
-  default: RetTy = Type::Int16Ty; break;
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
   std::vector<const Type*> paramTy;
@@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   for (Values::const_iterator i = inputs.begin(),
          e = inputs.end(); i != e; ++i) {
     const Value *value = *i;
-    DOUT << "value used in func: " << *value << "\n";
+    DEBUG(errs() << "value used in func: " << *value << "\n");
     paramTy.push_back(value->getType());
   }
 
   // Add the types of the output values to the function's argument list.
   for (Values::const_iterator I = outputs.begin(), E = outputs.end();
        I != E; ++I) {
-    DOUT << "instr used in func: " << **I << "\n";
+    DEBUG(errs() << "instr used in func: " << **I << "\n");
     if (AggregateArgs)
       paramTy.push_back((*I)->getType());
     else
       paramTy.push_back(PointerType::getUnqual((*I)->getType()));
   }
 
-  DOUT << "Function type: " << *RetTy << " f(";
+  DEBUG(errs() << "Function type: " << *RetTy << " f(");
   for (std::vector<const Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
-    DOUT << **i << ", ";
-  DOUT << ")\n";
+    DEBUG(errs() << **i << ", ");
+  DEBUG(errs() << ")\n");
 
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy));
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
     paramTy.clear();
     paramTy.push_back(StructPtr);
   }
-  const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false);
+  const FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
 
   // Create the new function
   Function *newFunction = Function::Create(funcType,
@@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
     Value *RewriteVal;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
-      std::string GEPname = "gep_" + inputs[i]->getName();
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
       TerminatorInst *TI = newFunction->begin()->getTerminator();
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, 
-                                                         GEPname, TI);
-      RewriteVal = new LoadInst(GEP, "load" + GEPname, TI);
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, Idx+2, 
+                                  "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
     } else
       RewriteVal = AI++;
 
@@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   return newFunction;
 }
 
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
@@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                            Values &inputs, Values &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
-  std::vector<Value*> params, StructValues, ReloadOutputs;
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
 
   // Add inputs as params, or to be filled into the struct
   for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
@@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       ArgTypes.push_back((*v)->getType());
 
     // Allocate a struct at the beginning of this function
-    Type *StructArgTy = StructType::get(ArgTypes);
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
     Struct =
       new AllocaInst(StructArgTy, 0, "structArg",
                      codeReplacer->getParent()->begin()->begin());
@@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
     for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
       GetElementPtrInst *GEP =
         GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                   "gep_" + StructValues[i]->getName());
@@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     Value *Output = 0;
     if (AggregateArgs) {
       Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::Int32Ty);
-      Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i);
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
       GetElementPtrInst *GEP
         = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
                                     "gep_reload_" + outputs[i]->getName());
@@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Output = ReloadOutputs[i];
     }
     LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
     std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
     for (unsigned u = 0, e = Users.size(); u != e; ++u) {
@@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   // Now we can emit a switch statement using the call as a value.
   SwitchInst *TheSwitch =
-      SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty),
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
                          codeReplacer, 0, codeReplacer);
 
   // Since there may be multiple exits from the original region, make the new
@@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
         if (!NewTarget) {
           // If we don't already have an exit stub for this non-extracted
           // destination, create one now!
-          NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub",
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
                                          newFunction);
           unsigned SuccNum = switchVal++;
 
@@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
           case 0:
           case 1: break;  // No value needed.
           case 2:         // Conditional branch, return a bool
-            brVal = ConstantInt::get(Type::Int1Ty, !SuccNum);
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
             break;
           default:
-            brVal = ConstantInt::get(Type::Int16Ty, SuccNum);
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
             break;
           }
 
-          ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget);
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
 
           // Update the switch instruction.
-          TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum),
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
                              OldTarget);
 
           // Restore values just before we exit
@@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                 DominatesDef = false;
             }
 
-            if (DT)
+            if (DT) {
               DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
 
             if (DominatesDef) {
               if (AggregateArgs) {
                 Value *Idx[2];
-                Idx[0] = Constant::getNullValue(Type::Int32Ty);
-                Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out);
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
                 GetElementPtrInst *GEP =
                   GetElementPtrInst::Create(OAI, Idx, Idx + 2,
                                             "gep_" + outputs[out]->getName(),
@@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // this should be rewritten as a `ret'
 
     // Check if the function should return a value
-    if (OldFnRetTy == Type::VoidTy) {
-      ReturnInst::Create(0, TheSwitch);  // Return void
+    if (OldFnRetTy == Type::getVoidTy(Context)) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
     } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
       // return what we have
-      ReturnInst::Create(TheSwitch->getCondition(), TheSwitch);
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
     } else {
       // Otherwise we must have code extracted an unwind or something, just
       // return whatever we want.
-      ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch);
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
     }
 
     TheSwitch->eraseFromParent();
@@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   Function *oldFunction = header->getParent();
 
   // This takes place of the original loop
-  BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction,
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
                                                 header);
 
   // The new function needs a root node because other nodes can branch to the
   // head of the region, but the entry node of a function cannot have preds.
-  BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot");
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
   newFuncRoot->getInstList().push_back(BranchInst::Create(header));
 
   // Find inputs to, outputs from the code region.
@@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
   //  cerr << "OLD FUNCTION: " << *oldFunction;
   //  verifyFunction(*oldFunction);
 
-  DEBUG(if (verifyFunction(*newFunction)) abort());
+  DEBUG(if (verifyFunction(*newFunction)) 
+        llvm_report_error("verifyFunction failed!"));
   return newFunction;
 }
 
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index b8dd754..c908b4a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = I.getParent()->getParent();
     Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
@@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   // Create a stack slot to hold the value.
   AllocaInst *Slot;
   if (AllocaPoint) {
-    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint);
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = P->getParent()->getParent();
     Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 4989c00..0d00d69 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
@@ -28,13 +29,73 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(CI), CG, TD);
+bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);
 }
-bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
-  return InlineFunction(CallSite(II), CG, TD);
+bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
+  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);
 }
 
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   BasicBlock *InvokeDest,
+                           const SmallVectorImpl<Value*> &InvokeDestPHIValues) {
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+    
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+    
+    // Next, create the new invoke instruction, inserting it at the end
+    // of the old basic block.
+    SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+                         InvokeArgs.begin(), InvokeArgs.end(),
+                         CI->getName(), BB->getTerminator());
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present.
+    CI->replaceAllUsesWith(II);
+    
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+    Split->getInstList().pop_front();  // Delete the original call
+    
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    unsigned i = 0;
+    for (BasicBlock::iterator I = InvokeDest->begin();
+         isa<PHINode>(I); ++I, ++i)
+      cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB);
+    
+    // This basic block is now complete, the caller will continue scanning the
+    // next one.
+    return;
+  }
+}
+  
+
 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
 /// in the body of the inlined function into invokes and turn unwind
 /// instructions into branches to the invoke unwind dest.
@@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
 /// block of the inlined code (the last block is the end of the function),
 /// and InlineCodeInfo is information about the code that got inlined.
 static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
-                                ClonedCodeInfo &InlinedCodeInfo,
-                                CallGraph *CG) {
+                                ClonedCodeInfo &InlinedCodeInfo) {
   BasicBlock *InvokeDest = II->getUnwindDest();
-  std::vector<Value*> InvokeDestPHIValues;
+  SmallVector<Value*, 8> InvokeDestPHIValues;
 
   // If there are PHI nodes in the unwind destination block, we need to
   // keep track of which values came into them from this invoke, then remove
@@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.
-  if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) {
-    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-         BB != E; ++BB) {
-      if (InlinedCodeInfo.ContainsCalls) {
-        for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){
-          Instruction *I = BBI++;
-
-          // We only need to check for function calls: inlined invoke
-          // instructions require no special handling.
-          if (!isa<CallInst>(I)) continue;
-          CallInst *CI = cast<CallInst>(I);
-
-          // If this call cannot unwind, don't convert it to an invoke.
-          if (CI->doesNotThrow())
-            continue;
-
-          // Convert this function call into an invoke instruction.
-          // First, split the basic block.
-          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
-
-          // Next, create the new invoke instruction, inserting it at the end
-          // of the old basic block.
-          SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
-          InvokeInst *II =
-            InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
-                               InvokeArgs.begin(), InvokeArgs.end(),
-                               CI->getName(), BB->getTerminator());
-          II->setCallingConv(CI->getCallingConv());
-          II->setAttributes(CI->getAttributes());
-
-          // Make sure that anything using the call now uses the invoke!
-          CI->replaceAllUsesWith(II);
-
-          // Update the callgraph.
-          if (CG) {
-            // We should be able to do this:
-            //   (*CG)[Caller]->replaceCallSite(CI, II);
-            // but that fails if the old call site isn't in the call graph,
-            // which, because of LLVM bug 3601, it sometimes isn't.
-            CallGraphNode *CGN = (*CG)[Caller];
-            for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end();
-                 NI != NE; ++NI) {
-              if (NI->first == CI) {
-                NI->first = II;
-                break;
-              }
-            }
-          }
-
-          // Delete the unconditional branch inserted by splitBasicBlock
-          BB->getInstList().pop_back();
-          Split->getInstList().pop_front();  // Delete the original call
-
-          // Update any PHI nodes in the exceptional block to indicate that
-          // there is now a new entry in them.
-          unsigned i = 0;
-          for (BasicBlock::iterator I = InvokeDest->begin();
-               isa<PHINode>(I); ++I, ++i) {
-            PHINode *PN = cast<PHINode>(I);
-            PN->addIncoming(InvokeDestPHIValues[i], BB);
-          }
-
-          // This basic block is now complete, start scanning the next one.
-          break;
-        }
-      }
-
-      if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-        // An UnwindInst requires special handling when it gets inlined into an
-        // invoke site.  Once this happens, we know that the unwind would cause
-        // a control transfer to the invoke exception destination, so we can
-        // transform it into a direct branch to the exception destination.
-        BranchInst::Create(InvokeDest, UI);
-
-        // Delete the unwind instruction!
-        UI->eraseFromParent();
-
-        // Update any PHI nodes in the exceptional block to indicate that
-        // there is now a new entry in them.
-        unsigned i = 0;
-        for (BasicBlock::iterator I = InvokeDest->begin();
-             isa<PHINode>(I); ++I, ++i) {
-          PHINode *PN = cast<PHINode>(I);
-          PN->addIncoming(InvokeDestPHIValues[i], BB);
-        }
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest,
+                                             InvokeDestPHIValues);
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      unsigned i = 0;
+      for (BasicBlock::iterator I = InvokeDest->begin();
+           isa<PHINode>(I); ++I, ++i) {
+        PHINode *PN = cast<PHINode>(I);
+        PN->addIncoming(InvokeDestPHIValues[i], BB);
       }
     }
   }
@@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   }
 
   for (; I != E; ++I) {
-    const Instruction *OrigCall = I->first.getInstruction();
+    const Value *OrigCall = I->first;
 
     DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
-    if (VMI != ValueMap.end() && VMI->second) {
-      // If the call was inlined, but then constant folded, there is no edge to
-      // add.  Check for this case.
-      if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
-        CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
-    }
+    if (VMI == ValueMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
+      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
   }
+  
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
   CallerNode->removeCallEdgeFor(CS);
@@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 /// findFnRegionEndMarker - This is a utility routine that is used by
 /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds
 /// to the llvm.dbg.func.start of the function F. Otherwise return NULL.
+///
 static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 
-  GlobalVariable *FnStart = NULL;
+  MDNode *FnStart = NULL;
   const DbgRegionEndInst *FnEnd = NULL;
   for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) 
     for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;
          ++BI) {
       if (FnStart == NULL)  {
         if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) {
-          DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram()));
+          DISubprogram SP(FSI->getSubprogram());
           assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");
           if (SP.describes(F))
-            FnStart = SP.getGV();
+            FnStart = SP.getNode();
         }
-      } else {
-        if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
-          if (REI->getContext() == FnStart)
-            FnEnd = REI;
+        continue;
       }
+      
+      if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
+        if (REI->getContext() == FnStart)
+          FnEnd = REI;
     }
   return FnEnd;
 }
@@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
 // exists in the instruction stream.  Similiarly this will inline a recursive
 // function by one level.
 //
-bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
+bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
+                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {
   Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
@@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
 
   // Make sure to capture all of the return instructions from the cloned
   // function.
-  std::vector<ReturnInst*> Returns;
+  SmallVector<ReturnInst*, 8> Returns;
   ClonedCodeInfo InlinedFunctionInfo;
   Function::iterator FirstNewBlock;
 
@@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
       if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
           !CalledFunc->onlyReadsMemory()) {
         const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+        const Type *VoidPtrTy = 
+            Type::getInt8PtrTy(Context);
 
         // Create the alloca.  If we have TargetData, use nice alignment.
         unsigned Align = 1;
         if (TD) Align = TD->getPrefTypeAlignment(AggTy);
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(),
-                                          Caller->begin()->begin());
+        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
+                                          I->getName(), 
+                                          &*Caller->begin()->begin());
         // Emit a memcpy.
-        const Type *Tys[] = { Type::Int64Ty };
+        const Type *Tys[] = { Type::getInt64Ty(Context) };
         Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                        Intrinsic::memcpy, 
                                                        Tys, 1);
@@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
         if (TD == 0)
           Size = ConstantExpr::getSizeOf(AggTy);
         else
-          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy));
+          Size = ConstantInt::get(Type::getInt64Ty(Context),
+                                         TD->getTypeStoreSize(AggTy));
 
         // Always generate a memcpy of alignment 1 here because we don't know
         // the alignment of the src pointer.  Other optimizations can infer
         // better alignment.
         Value *CallArgs[] = {
-          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1)
+          DestCast, SrcCast, Size,
+          ConstantInt::get(Type::getInt32Ty(Context), 1)
         };
         CallInst *TheMemCpy =
           CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
@@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
     // call site. The function body cloner does not clone original
     // region end marker from the CalledFunc. This will ensure that
     // inlined function's scope ends at the right place. 
-    const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc);
-    if (DREI) {
-      for (BasicBlock::iterator BI = TheCall, 
-             BE = TheCall->getParent()->end(); BI != BE; ++BI) {
+    if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) {
+      for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end();
+           BI != BE; ++BI) {
         if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {
           if (DbgRegionEndInst *NewDREI = 
-              dyn_cast<DbgRegionEndInst>(DREI->clone()))
+                dyn_cast<DbgRegionEndInst>(DREI->clone()))
             NewDREI->insertAfter(DSPI);
           break;
         }
@@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   {
     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
     for (BasicBlock::iterator I = FirstNewBlock->begin(),
-           E = FirstNewBlock->end(); I != E; )
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
-        // If the alloca is now dead, remove it.  This often occurs due to code
-        // specialization.
-        if (AI->use_empty()) {
-          AI->eraseFromParent();
-          continue;
-        }
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
 
-        if (isa<Constant>(AI->getArraySize())) {
-          // Scan for the block of allocas that we can move over, and move them
-          // all at once.
-          while (isa<AllocaInst>(I) &&
-                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
-            ++I;
-
-          // Transfer all of the allocas over in a block.  Using splice means
-          // that the instructions aren't removed from the symbol table, then
-          // reinserted.
-          Caller->getEntryBlock().getInstList().splice(
-              InsertPoint,
-              FirstNewBlock->getInstList(),
-              AI, I);
-        }
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller if the
+      // StaticAllocas pointer is non-null.
+      if (StaticAllocas) StaticAllocas->push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I));
+        ++I;
       }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
   }
 
   // If the inlined code contained dynamic alloca instructions, wrap the inlined
@@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
          BB != E; ++BB) {
       TerminatorInst *Term = BB->getTerminator();
       if (isa<UnwindInst>(Term)) {
-        new UnreachableInst(Term);
+        new UnreachableInst(Context, Term);
         BB->getInstList().erase(Term);
       }
     }
@@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
   // any inlined 'unwind' instructions into branches to the invoke exception
   // destination, and call instructions into invoke instructions.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG);
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
 
   // If we cloned in _exactly one_ basic block, and if that block ends in a
   // return instruction, we splice the body of the inlined callee directly into
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 4f8a160..1fa51a3 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -32,7 +32,7 @@ namespace {
     bool runOnFunction(Function &F) {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
-        if (!AI->hasName() && AI->getType() != Type::VoidTy)
+        if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
           AI->setName("tmp");
 
       for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -40,7 +40,7 @@ namespace {
           BB->setName("BB");
         
         for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-          if (!I->hasName() && I->getType() != Type::VoidTy)
+          if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
             I->setName("tmp");
       }
       return true;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index d5e7303..56e662e 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -33,22 +33,19 @@
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
-#include <algorithm>
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 
 namespace {
-  struct VISIBILITY_HIDDEN LCSSA : public LoopPass {
+  struct LCSSA : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
     LCSSA() : LoopPass(&ID) {}
 
@@ -57,12 +54,10 @@ namespace {
     DominatorTree *DT;
     std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
+    Loop *L;
     
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
-    void ProcessInstruction(Instruction* Instr,
-                            const SmallVector<BasicBlock*, 8>& exitBlocks);
-    
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.  It maintains both of these,
     /// as well as the CFG.  It also requires dominator information.
@@ -71,9 +66,9 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
+      AU.addRequiredTransitive<LoopInfo>();
       AU.addPreserved<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominatorTree>();
 
@@ -85,15 +80,17 @@ namespace {
       AU.addPreserved<DominanceFrontier>();
     }
   private:
-    void getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                 const SmallVector<BasicBlock*, 8>& exitBlocks);
-
-    Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                            DenseMap<DomTreeNode*, Value*> &Phis);
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm() && "LCSSA form not preserved!");
+    }
 
     /// inLoop - returns true if the given block is within the current loop
-    bool inLoop(BasicBlock* B) {
+    bool inLoop(BasicBlock *B) const {
       return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
     }
   };
@@ -105,181 +102,163 @@ static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
 const PassInfo *const llvm::LCSSAID = &X;
 
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
 /// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) {
-  PredCache.clear();
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
   
   LI = &LPM.getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Speed up queries by creating a sorted list of blocks
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
   LoopBlocks.clear();
   LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
-  std::sort(LoopBlocks.begin(), LoopBlocks.end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
   
-  SmallVector<BasicBlock*, 8> exitBlocks;
-  L->getExitBlocks(exitBlocks);
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
   
-  SetVector<Instruction*> AffectedValues;
-  getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks);
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
   
-  // If no values are affected, we can save a lot of work, since we know that
-  // nothing will be changed.
-  if (AffectedValues.empty())
-    return false;
+  assert(L->isLCSSAForm());
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
   
-  // Iterate over all affected values for this loop and insert Phi nodes
-  // for them in the appropriate exit blocks
+  BasicBlock *InstBB = Inst->getParent();
   
-  for (SetVector<Instruction*>::iterator I = AffectedValues.begin(),
-       E = AffectedValues.end(); I != E; ++I)
-    ProcessInstruction(*I, exitBlocks);
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(*UI))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
   
-  assert(L->isLCSSAForm());
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
   
-  return true;
-}
-
-/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes,
-/// eliminate all out-of-loop uses.
-void LCSSA::ProcessInstruction(Instruction *Instr,
-                               const SmallVector<BasicBlock*, 8>& exitBlocks) {
   ++NumLCSSA; // We are applying the transformation
 
-  // Keep track of the blocks that have the value available already.
-  DenseMap<DomTreeNode*, Value*> Phis;
-
-  BasicBlock *DomBB = Instr->getParent();
-
   // Invoke instructions are special in that their result value is not available
   // along their unwind edge. The code below tests to see whether DomBB dominates
   // the value, so adjust DomBB to the normal destination block, which is
   // effectively where the value is first usable.
-  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr))
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
     DomBB = Inv->getNormalDest();
 
   DomTreeNode *DomNode = DT->getNode(DomBB);
 
-  // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
-  // add them to the Phi's map.
-  for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(),
-      BBE = exitBlocks.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = *BBI;
-    DomTreeNode *ExitBBNode = DT->getNode(BB);
-    Value *&Phi = Phis[ExitBBNode];
-    if (!Phi && DT->dominates(DomNode, ExitBBNode)) {
-      PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
-                                    BB->begin());
-      PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
-
-      // Remember that this phi makes the value alive in this block.
-      Phi = PN;
-
-      // Add inputs from inside the loop for this PHI.
-      for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI)
-        PN->addIncoming(Instr, *PI);
-    }
-  }
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst);
   
-  
-  // Record all uses of Instr outside the loop.  We need to rewrite these.  The
-  // LCSSA phis won't be included because they use the value in the loop.
-  for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end();
-       UI != E;) {
-    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-    if (PHINode *P = dyn_cast<PHINode>(*UI)) {
-      UserBB = P->getIncomingBlock(UI);
-    }
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value., and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
     
-    // If the user is in the loop, don't rewrite it!
-    if (UserBB == Instr->getParent() || inLoop(UserBB)) {
-      ++UI;
-      continue;
-    }
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
     
-    // Otherwise, patch up uses of the value with the appropriate LCSSA Phi,
-    // inserting PHI nodes into join points where needed.
-    Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis);
-    
-    // Preincrement the iterator to avoid invalidating it when we change the
-    // value.
-    Use &U = UI.getUse();
-    ++UI;
-    U.set(Val);
-  }
-}
+    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
-/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that
-/// are used by instructions outside of it.
-void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L,
-                                      SetVector<Instruction*> &AffectedValues,
-                                const SmallVector<BasicBlock*, 8>& exitBlocks) {
-  // FIXME: For large loops, we may be able to avoid a lot of use-scanning
-  // by using dominance information.  In particular, if a block does not
-  // dominate any of the loop exits, then none of the values defined in the
-  // block could be used outside the loop.
-  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
-       BB != BE; ++BB) {
-    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I)
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
-           ++UI) {
-        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
-        if (PHINode* p = dyn_cast<PHINode>(*UI)) {
-          UserBB = p->getIncomingBlock(UI);
-        }
-        
-        if (*BB != UserBB && !inLoop(UserBB)) {
-          AffectedValues.insert(I);
-          break;
-        }
-      }
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+      PN->addIncoming(Inst, *PI);
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
   }
-}
-
-/// GetValueForBlock - Get the value to use within the specified basic block.
-/// available values are in Phis.
-Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
-                               DenseMap<DomTreeNode*, Value*> &Phis) {
-  // If there is no dominator info for this BB, it is unreachable.
-  if (BB == 0)
-    return UndefValue::get(OrigInst->getType());
-                                 
-  // If we have already computed this value, return the previously computed val.
-  if (Phis.count(BB)) return Phis[BB];
-
-  DomTreeNode *IDom = BB->getIDom();
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
 
-  // Otherwise, there are two cases: we either have to insert a PHI node or we
-  // don't.  We need to insert a PHI node if this block is not dominated by one
-  // of the exit nodes from the loop (the loop could have multiple exits, and
-  // though the value defined *inside* the loop dominated all its uses, each
-  // exit by itself may not dominate all the uses).
-  //
-  // The simplest way to check for this condition is by checking to see if the
-  // idom is in the loop.  If so, we *know* that none of the exit blocks
-  // dominate this block.  Note that we *know* that the block defining the
-  // original instruction is in the idom chain, because if it weren't, then the
-  // original value didn't dominate this use.
-  if (!inLoop(IDom->getBlock())) {
-    // Idom is not in the loop, we must still be "below" the exit block and must
-    // be fully dominated by the value live in the idom.
-    Value* val = GetValueForBlock(IDom, OrigInst, Phis);
-    Phis.insert(std::make_pair(BB, val));
-    return val;
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
   }
   
-  BasicBlock *BBN = BB->getBlock();
-  
-  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
-  // now, then get values to fill in the incoming values for the PHI.
-  PHINode *PN = PHINode::Create(OrigInst->getType(),
-                                OrigInst->getName() + ".lcssa", BBN->begin());
-  PN->reserveOperandSpace(PredCache.GetNumPreds(BBN));
-  Phis.insert(std::make_pair(BB, PN));
-                                 
-  // Fill in the incoming values for the block.
-  for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI)
-    PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI);
-  return PN;
+  return true;
 }
 
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c08638..b622611 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,9 +20,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
@@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
     } else if (SI->getNumSuccessors() == 2) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
-      Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(),
-                                 SI->getSuccessorValue(1), "cond", SI);
+      Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
+                                 SI->getSuccessorValue(1), "cond");
       // Insert the new branch...
       BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
 
@@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
 /// too, recursively.
 void
 llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
-
   // We can remove a PHI if it is on a cycle in the def-use graph
   // where each node in the cycle has degree one, i.e. only one use,
   // and is an instruction with no side effects.
@@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
 ///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
     Value *NewVal = PN->getIncomingValue(0);
@@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  if (P) {
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
   // Nuke BB.
   PredBB->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index d6b167f..c22708a 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,10 +37,12 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
@@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
 STATISTIC(NumNested  , "Number of nested loops split out");
 
 namespace {
-  struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass {
+  struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : FunctionPass(&ID) {}
+    LoopSimplify() : LoopPass(&ID) {}
 
     // AA - If we have an alias analysis object to update, this is it, otherwise
     // this is null.
     AliasAnalysis *AA;
     LoopInfo *LI;
     DominatorTree *DT;
-    virtual bool runOnFunction(Function &F);
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       // We need loop information to identify the loops...
-      AU.addRequired<LoopInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequiredTransitive<LoopInfo>();
+      AU.addRequiredTransitive<DominatorTree>();
 
       AU.addPreserved<LoopInfo>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
 
     /// verifyAnalysis() - Verify loop nest.
     void verifyAnalysis() const {
-#ifndef NDEBUG
-      LoopInfo *NLI = &getAnalysis<LoopInfo>();
-      for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) 
-        (*I)->verifyLoop();
-#endif  
+      assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
     }
 
   private:
-    bool ProcessLoop(Loop *L);
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
-    void InsertPreheaderForLoop(Loop *L);
-    Loop *SeparateNestedLoop(Loop *L);
-    void InsertUniqueBackedgeBlock(Loop *L);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
                                   Loop *L);
@@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true);
 
 // Publically exposed interface to pass...
 const PassInfo *const llvm::LoopSimplifyID = &X;
-FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 
 /// runOnFunction - Run down all loops in the CFG (recursively, but we could do
 /// it in any convenient order) inserting preheaders...
 ///
-bool LoopSimplify::runOnFunction(Function &F) {
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
   bool Changed = false;
   LI = &getAnalysis<LoopInfo>();
   AA = getAnalysisIfAvailable<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
 
-  // Check to see that no blocks (other than the header) in loops have
-  // predecessors that are not in loops.  This is not valid for natural loops,
-  // but can occur if the blocks are unreachable.  Since they are unreachable we
-  // can just shamelessly destroy their terminators to make them not branch into
-  // the loop!
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    // This case can only occur for unreachable blocks.  Blocks that are
-    // unreachable can't be in loops, so filter those blocks out.
-    if (LI->getLoopFor(BB)) continue;
-    
-    bool BlockUnreachable = false;
-    TerminatorInst *TI = BB->getTerminator();
-
-    // Check to see if any successors of this block are non-loop-header loops
-    // that are not the header.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-      // If this successor is not in a loop, BB is clearly ok.
-      Loop *L = LI->getLoopFor(TI->getSuccessor(i));
-      if (!L) continue;
-      
-      // If the succ is the loop header, and if L is a top-level loop, then this
-      // is an entrance into a loop through the header, which is also ok.
-      if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0)
-        continue;
-      
-      // Otherwise, this is an entrance into a loop from some place invalid.
-      // Either the loop structure is invalid and this is not a natural loop (in
-      // which case the compiler is buggy somewhere else) or BB is unreachable.
-      BlockUnreachable = true;
-      break;
-    }
-    
-    // If this block is ok, check the next one.
-    if (!BlockUnreachable) continue;
-    
-    // Otherwise, this block is dead.  To clean up the CFG and to allow later
-    // loop transformations to ignore this case, we delete the edges into the
-    // loop by replacing the terminator.
-    
-    // Remove PHI entries from the successors.
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      TI->getSuccessor(i)->removePredecessor(BB);
-   
-    // Add a new unreachable instruction before the old terminator.
-    new UnreachableInst(TI);
-    
-    // Delete the dead terminator.
-    if (AA) AA->deleteValue(TI);
-    if (!TI->use_empty())
-      TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
-    TI->eraseFromParent();
-    Changed |= true;
-  }
-  
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
+  Changed |= ProcessLoop(L, LPM);
 
   return Changed;
 }
@@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) {
 /// ProcessLoop - Walk the loop structure in depth first order, ensuring that
 /// all loops have preheaders.
 ///
-bool LoopSimplify::ProcessLoop(Loop *L) {
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
   bool Changed = false;
 ReprocessLoop:
-  
-  // Canonicalize inner loops before outer loops.  Inner loop canonicalization
-  // can provide work for the outer loop to canonicalize.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= ProcessLoop(*I);
-  
-  assert(L->getBlocks()[0] == L->getHeader() &&
-         "Header isn't first block in loop?");
+
+  // Check to see that no blocks (other than the header) in this loop that has
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock *, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI)
+      if (!L->contains(*PI))
+        BadPreds.insert(*PI);
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
 
   // Does the loop already have a preheader?  If so, don't insert one.
-  if (L->getLoopPreheader() == 0) {
-    InsertPreheaderForLoop(L);
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
     NumInserted++;
     Changed = true;
   }
@@ -229,10 +196,9 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (NumBackedges < 8) {
-      if (Loop *NL = SeparateNestedLoop(L)) {
+      if (SeparateNestedLoop(L, LPM)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
-        ProcessLoop(NL);
         Changed = true;
         // GCC doesn't tail recursion eliminate this.
         goto ReprocessLoop;
@@ -242,7 +208,7 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    InsertUniqueBackedgeBlock(L);
+    InsertUniqueBackedgeBlock(L, Preheader);
     NumInserted++;
     Changed = true;
   }
@@ -253,7 +219,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = PN->hasConstantValue()) {
+    if (Value *V = PN->hasConstantValue(DT)) {
       if (AA) AA->deleteValue(PN);
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -286,19 +252,10 @@ ReprocessLoop:
         Instruction *Inst = I++;
         if (Inst == CI)
           continue;
-        if (Inst->isTrapping()) {
+        if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
           AllInvariant = false;
           break;
         }
-        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
-          if (!L->isLoopInvariant(Inst->getOperand(j))) {
-            AllInvariant = false;
-            break;
-          }
-        if (!AllInvariant)
-          break;
-        // Hoist.
-        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
       }
       if (!AllInvariant) continue;
 
@@ -317,9 +274,10 @@ ReprocessLoop:
       DomTreeNode *Node = DT->getNode(ExitingBlock);
       const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
         Node->getChildren();
-      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
-        DT->changeImmediateDominator(Children[k], Node->getIDom());
-        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Child->getBlock(),
                                              Node->getIDom()->getBlock(),
                                              DT);
       }
@@ -339,7 +297,7 @@ ReprocessLoop:
 /// preheader, this method is called to insert one.  This method has two phases:
 /// preheader insertion and analysis updating.
 ///
-void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *Header = L->getHeader();
 
   // Compute the set of predecessors of the loop that are not in the loop.
@@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   BasicBlock *NewBB =
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
-  
-
-  //===--------------------------------------------------------------------===//
-  //  Update analysis results now that we have performed the transformation
-  //
-
-  // We know that we have loop information to update... update it now.
-  if (Loop *Parent = L->getParentLoop())
-    Parent->addBasicBlockToLoop(NewBB, LI->getBase());
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
   PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
 }
 
 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                              LoopBlocks.size(), ".loopexit",
                                              this);
 
-  // Update Loop Information - we know that the new block will be in whichever
-  // loop the Exit block is in.  Note that it may not be in that immediate loop,
-  // if the successor is some other loop header.  In that case, we continue 
-  // walking up the loop tree to find a loop that contains both the successor
-  // block and the predecessor block.
-  Loop *SuccLoop = LI->getLoopFor(Exit);
-  while (SuccLoop && !SuccLoop->contains(L->getHeader()))
-    SuccLoop = SuccLoop->getParentLoop();
-  if (SuccLoop)
-    SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-
   return NewBB;
 }
 
@@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = PN->hasConstantValue())
-      if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) {
-        // This is a degenerate PHI already, don't modify it!
-        PN->replaceAllUsesWith(V);
-        if (AA) AA->deleteValue(PN);
-        PN->eraseFromParent();
-        continue;
-      }
+    if (Value *V = PN->hasConstantValue(DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
 
     // Scan this PHI node looking for a use of the PHI node by itself.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
   if (PN == 0) return 0;  // No known way to partition.
 
@@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
   else
     LI->changeTopLevelLoop(L, NewOuter);
 
-  // This block is going to be our new header block: add it to this loop and all
-  // parent loops.
-  NewOuter->addBasicBlockToLoop(NewBB, LI->getBase());
-
   // L is now a subloop of our outer loop.
   NewOuter->addChildLoop(L);
 
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     NewOuter->addBlockEntry(*I);
 
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
   // Determine which blocks should stay in L and which should be moved out to
   // the Outer loop now.
   std::set<BasicBlock*> BlocksInL;
@@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
 /// backedges to target a new basic block and have that block branch to the loop
 /// header.  This ensures that loops have exactly one backedge.
 ///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
+void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
-  BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   Function *F = Header->getParent();
 
@@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
     if (*I != Preheader) BackedgeBlocks.push_back(*I);
 
   // Create and insert the new backedge block...
-  BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F);
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
 
   // Move the new backedge block to right after the last backedge block.
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 74e7028..f26d7c1 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -19,6 +19,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetData.h"
@@ -28,17 +29,17 @@ using namespace llvm;
 STATISTIC(NumLowered, "Number of allocations lowered");
 
 namespace {
-  /// LowerAllocations - Turn malloc and free instructions into %malloc and
-  /// %free calls.
+  /// LowerAllocations - Turn malloc and free instructions into @malloc and
+  /// @free calls.
   ///
   class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
-    Constant *MallocFunc;   // Functions in the module we are processing
-    Constant *FreeFunc;     // Initialized by doInitialization
+    Constant *FreeFunc;   // Functions in the module we are processing
+                          // Initialized by doInitialization
     bool LowerMallocArgToInteger;
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LowerAllocations(bool LowerToInt = false)
-      : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), 
+      : BasicBlockPass(&ID), FreeFunc(0), 
         LowerMallocArgToInteger(LowerToInt) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
 // This function is always successful.
 //
 bool LowerAllocations::doInitialization(Module &M) {
-  const Type *BPTy = PointerType::getUnqual(Type::Int8Ty);
-  // Prototype malloc as "char* malloc(...)", because we don't know in
-  // doInitialization whether size_t is int or long.
-  FunctionType *FT = FunctionType::get(BPTy, true);
-  MallocFunc = M.getOrInsertFunction("malloc", FT);
-  FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0);
+  const Type *BPTy = Type::getInt8PtrTy(M.getContext());
+  FreeFunc = M.getOrInsertFunction("free"  , Type::getVoidTy(M.getContext()),
+                                   BPTy, (Type *)0);
   return true;
 }
 
@@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) {
 //
 bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
   bool Changed = false;
-  assert(MallocFunc && FreeFunc && "Pass not initialized!");
+  assert(FreeFunc && "Pass not initialized!");
 
   BasicBlock::InstListType &BBIL = BB.getInstList();
 
   const TargetData &TD = getAnalysis<TargetData>();
-  const Type *IntPtrTy = TD.getIntPtrType();
+  const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());
 
   // Loop over all of the instructions, looking for malloc or free instructions
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
     if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
-      const Type *AllocTy = MI->getType()->getElementType();
-
-      // malloc(type) becomes i8 *malloc(size)
-      Value *MallocArg;
-      if (LowerMallocArgToInteger)
-        MallocArg = ConstantInt::get(Type::Int64Ty,
-                                     TD.getTypeAllocSize(AllocTy));
-      else
-        MallocArg = ConstantExpr::getSizeOf(AllocTy);
-      MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), 
-                                                  IntPtrTy);
-
-      if (MI->isArrayAllocation()) {
-        if (isa<ConstantInt>(MallocArg) &&
-            cast<ConstantInt>(MallocArg)->isOne()) {
-          MallocArg = MI->getOperand(0);         // Operand * 1 = Operand
-        } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) {
-          CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/);
-          MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg));
-        } else {
-          Value *Scale = MI->getOperand(0);
-          if (Scale->getType() != IntPtrTy)
-            Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/,
-                                                "", I);
-
-          // Multiply it by the array size if necessary...
-          MallocArg = BinaryOperator::Create(Instruction::Mul, Scale,
-                                             MallocArg, "", I);
-        }
-      }
-
-      // Create the call to Malloc.
-      CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I);
-      MCall->setTailCall();
-
-      // Create a cast instruction to convert to the right type...
-      Value *MCast;
-      if (MCall->getType() != Type::VoidTy)
-        MCast = new BitCastInst(MCall, MI->getType(), "", I);
-      else
-        MCast = Constant::getNullValue(MI->getType());
+      Value *ArraySize = MI->getOperand(0);
+      if (ArraySize->getType() != IntPtrTy)
+        ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy,
+                                                false /*ZExt*/, "", I);
+      Value *MCast = CallInst::CreateMalloc(I, IntPtrTy,
+                                            MI->getAllocatedType(), ArraySize);
 
       // Replace all uses of the old malloc inst with the cast inst
       MI->replaceAllUsesWith(MCast);
@@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
     } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
       Value *PtrCast = 
         new BitCastInst(FI->getOperand(0),
-                        PointerType::getUnqual(Type::Int8Ty), "", I);
+               Type::getInt8PtrTy(BB.getContext()), "", I);
 
       // Insert a call to the free function...
       CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 1f6b1a2..9a3de26 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -40,6 +40,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty);
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
   AbortMessage = 0;
   if (ExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
@@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) {
     { // The type is recursive, so use a type holder.
       std::vector<const Type*> Elements;
       Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get();
+      OpaqueType *OT = OpaqueType::get(M.getContext());
       Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(Elements));
+      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
       OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
       JBLinkTy = JBLType.get();
       M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
@@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) {
     // Now that we've done that, insert the jmpbuf list head global, unless it
     // already exists.
     if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
-      JBListHead = new GlobalVariable(PtrJBList, false,
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
                                       GlobalValue::LinkOnceAnyLinkage,
                                       Constant::getNullValue(PtrJBList),
-                                      "llvm.sjljeh.jblist", &M);
+                                      "llvm.sjljeh.jblist");
     }
 
 // VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
@@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) {
   }
 
   // We need the 'write' and 'abort' functions for both models.
-  AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0);
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
 #if 0 // "write" is Unix-specific.. code is going away soon anyway.
   WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
                                   VoidPtrTy, Type::Int32Ty, (Type *)0);
@@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) {
     // The abort message for expensive EH support tells the user that the
     // program 'unwound' without an 'invoke' instruction.
     Constant *Msg =
-      ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+      ConstantArray::get(M->getContext(),
+                         "ERROR: Exception thrown, but not caught!\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2,
+                     Constant::getNullValue(Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   } else {
     // The abort message for cheap EH support tells the user that EH is not
     // enabled.
     Constant *Msg =
-      ConstantArray::get("Exception handler needed, but not enabled.  Recompile"
-                         " program with -enable-correct-eh-support.\n");
+      ConstantArray::get(M->getContext(), 
+                        "Exception handler needed, but not enabled."      
+                        "Recompile program with -enable-correct-eh-support.\n");
     AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
 
-    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
                                                GlobalValue::InternalLinkage,
-                                               Msg, "abortmsg", M);
-    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+                                               Msg, "abortmsg");
+    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(
+                                            Type::getInt32Ty(M->getContext())));
     AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
   }
 }
@@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 
       // Insert a return instruction.  This really should be a "barrier", as it
       // is unreachable.
-      ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 :
-                         Constant::getNullValue(F.getReturnType()), UI);
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType() == Type::getVoidTy(F.getContext()) ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
 
       // Remove the unwind instruction now.
       BB->getInstList().erase(UI);
@@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
 void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
                                          AllocaInst *InvokeNum,
                                          SwitchInst *CatchSwitch) {
-  ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo);
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
 
   // If the unwind edge has phi nodes, split the edge.
   if (isa<PHINode>(II->getUnwindDest()->begin())) {
@@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
 
   BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
   // nonvolatile.
-  new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI);
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
 
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
@@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     // alloca because the value needs to be live across invokes.
     unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
     AllocaInst *JmpBuf =
-      new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin());
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
 
     std::vector<Value*> Idx;
-    Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-    Idx.push_back(ConstantInt::get(Type::Int32Ty, 1));
+    Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+    Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1));
     OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
-                                             "OldBuf", EntryBB->getTerminator());
+                                             "OldBuf",
+                                              EntryBB->getTerminator());
 
     // Copy the JBListHead to the alloca.
     Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
@@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
 
     // Create the catch block.  The catch block is basically a big switch
     // statement that goes to all of the invoke catch blocks.
-    BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F);
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
 
     // Create an alloca which keeps track of which invoke is currently
     // executing.  For normal calls it contains zero.
-    AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum",
-                                           EntryBB->begin());
-    new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true,
-                  EntryBB->getTerminator());
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
 
     // Insert a load in the Catch block, and a switch on its value.  By default,
     // we go to a block that just does an unwind (which is the correct action
     // for a standard call).
-    BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F);
-    Unwinds.push_back(new UnwindInst(UnwindBB));
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
 
     Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
     SwitchInst *CatchSwitch =
@@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
     BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                      "setjmp.cont");
 
-    Idx[1] = ConstantInt::get(Type::Int32Ty, 0);
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
     Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
                                                  "TheJmpBuf",
                                                  EntryBB->getTerminator());
-    JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty),
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
                                 "tmp", EntryBB->getTerminator());
     Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
                                     EntryBB->getTerminator());
 
     // Compare the return value to zero.
-    Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet,
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
                                    Constant::getNullValue(SJRet->getType()),
-      "notunwind", EntryBB->getTerminator());
+                                   "notunwind");
     // Nuke the uncond branch.
     EntryBB->getTerminator()->eraseFromParent();
 
@@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   // Create three new blocks, the block to load the jmpbuf ptr and compare
   // against null, the block to do the longjmp, and the error block for if it
   // is null.  Add them at the end of the function because they are not hot.
-  BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F);
-  BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F);
-  BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F);
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
 
   // If this function contains an invoke, restore the old jumpbuf ptr.
   Value *BufPtr;
@@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   }
 
   // Load the JBList, if it's null, then there was no catch!
-  Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr,
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
                                 Constant::getNullValue(BufPtr->getType()),
-    "notnull", UnwindHandler);
+                                "notnull");
   BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
 
   // Create the block to do the longjmp.
   // Get a pointer to the jmpbuf and longjmp.
   std::vector<Value*> Idx;
-  Idx.push_back(Constant::getNullValue(Type::Int32Ty));
-  Idx.push_back(ConstantInt::get(Type::Int32Ty, 0));
+  Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
+  Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0));
   Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",
                                      UnwindBlock);
-  Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty),
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
-  Idx[1] = ConstantInt::get(Type::Int32Ty, 1);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
   CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock);
-  new UnreachableInst(UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
-  new UnreachableInst(TermBlock);
+  new UnreachableInst(F.getContext(), TermBlock);
 
   // Insert a new call to write(2, AbortMessage, AbortMessageLength);
   writeAbortMessage(TermBlock->getTerminator());
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1da5936..764f098 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) {
 
 // operator<< - Used for debugging purposes.
 //
-static std::ostream& operator<<(std::ostream &O,
-                                const LowerSwitch::CaseVector &C) {
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
   O << "[";
 
   for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
@@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O,
   return O << "]";
 }
 
-static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) {
-  if (O.stream()) *O.stream() << C;
-  return O;
-}
-
 // switchConvert - Convert the switch statement into a binary lookup of
 // the case values. The function recursively builds this tree.
 //
@@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   unsigned Mid = Size / 2;
   std::vector<CaseRange> LHS(Begin, Begin + Mid);
-  DOUT << "LHS: " << LHS << "\n";
+  DEBUG(errs() << "LHS: " << LHS << "\n");
   std::vector<CaseRange> RHS(Begin + Mid, End);
-  DOUT << "RHS: " << RHS << "\n";
+  DEBUG(errs() << "RHS: " << RHS << "\n");
 
   CaseRange& Pivot = *(Begin + Mid);
   DEBUG(errs() << "Pivot ==> " 
@@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
   // Create a new node that checks if the value is < pivot. Go to the
   // left branch if it is and right branch if not.
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewNode = BasicBlock::Create("NodeBlock");
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewNode);
 
-  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
   NewNode->getInstList().push_back(Comp);
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
   return NewNode;
@@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                       BasicBlock* Default)
 {
   Function* F = OrigBlock->getParent();
-  BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock");
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewLeaf);
 
@@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   ICmpInst* Comp = NULL;
   if (Leaf.Low == Leaf.High) {
     // Make the seteq instruction...
-    Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low,
-                        "SwitchLeaf", NewLeaf);
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
   } else {
     // Make range comparison
     if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
       // Val >= Min && Val <= Hi --> Val <= Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
     } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
       // Val >= 0 && Val <= Hi --> Val <=u Hi
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High,
-                          "SwitchLeaf", NewLeaf);      
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
     } else {
       // Emit V-Lo <=u Hi-Lo
       Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
@@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                                    Val->getName()+".off",
                                                    NewLeaf);
       Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
-      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound,
-                          "SwitchLeaf", NewLeaf);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
     }
   }
 
@@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
 
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
-  BasicBlock* NewDefault = BasicBlock::Create("NewDefault");
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
   F->getBasicBlockList().insert(Default, NewDefault);
 
   BranchInst::Create(Default, NewDefault);
@@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   CaseVector Cases;
   unsigned numCmps = Clusterify(Cases, SI);
 
-  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
-       << ". Total compares: " << numCmps << "\n";
-  DOUT << "Cases: " << Cases << "\n";
+  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(errs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
   
   BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
                                           OrigBlock, NewDefault);
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 2b06d77..5df0832 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT, DF, F.getContext());
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b717699..9ca06bd 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -23,13 +23,13 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
@@ -41,7 +41,6 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-// Provide DenseMapInfo for all pointers.
 namespace llvm {
 template<>
 struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
@@ -181,6 +180,8 @@ namespace {
     /// AST - An AliasSetTracker object to update.  If null, don't update it.
     ///
     AliasSetTracker *AST;
+    
+    LLVMContext &Context;
 
     /// AllocaLookup - Reverse mapping of Allocas.
     ///
@@ -212,8 +213,9 @@ namespace {
     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   DominanceFrontier &df, AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DF(df), AST(ast) {}
+                   DominanceFrontier &df, AliasSetTracker *ast,
+                   LLVMContext &C)
+      : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {}
 
     void run();
 
@@ -291,10 +293,9 @@ namespace {
       // As we scan the uses of the alloca instruction, keep track of stores,
       // and decide whether all of the loads and stores to the alloca are within
       // the same basic block.
-      for (Value::use_iterator U = AI->use_begin(), E = AI->use_end();
-           U != E;)  {
-        Instruction *User = cast<Instruction>(*U);
-        ++U;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
         if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
           // Remove any uses of this alloca in DbgInfoInstrinsics.
           assert(BC->hasOneUse() && "Unexpected alloca uses!");
@@ -303,7 +304,8 @@ namespace {
           BC->eraseFromParent();
           continue;
         } 
-        else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
           // Remember the basic blocks which define new values for the alloca
           DefiningBlocks.push_back(SI->getParent());
           AllocaPointerVal = SI->getOperand(0);
@@ -491,17 +493,14 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
       
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = PN->hasConstantValue(true)) {
-        if (!isa<Instruction>(V) ||
-            properlyDominates(cast<Instruction>(V), PN)) {
-          if (AST && isa<PointerType>(PN->getType()))
-            AST->deleteValue(PN);
-          PN->replaceAllUsesWith(V);
-          PN->eraseFromParent();
-          NewPhiNodes.erase(I++);
-          EliminatedAPHI = true;
-          continue;
-        }
+      if (Value *V = PN->hasConstantValue(&DT)) {
+        if (AST && isa<PointerType>(PN->getType()))
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
       }
       ++I;
     }
@@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
         LiveInBlockWorklist.pop_back();
         --i, --e;
         break;
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         if (LI->getOperand(0) != AI) continue;
         
         // Okay, we found a load before a store to the alloca.  It is actually
@@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
   }
 }
 
+namespace {
 
 /// StoreIndexSearchPredicate - This is a helper predicate used to search by the
 /// first element of a pair.
@@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate {
   }
 };
 
+}
+
 /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
 /// block.  If this is the case, avoid traversing the CFG and inserting a lot of
 /// potentially useless PHI nodes by just performing a single linear pass over
@@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   // Create a PhiNode using the dereferenced type... and add the phi-node to the
   // BasicBlock.
   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
-                       Allocas[AllocaNo]->getName() + "." +
-                       utostr(Version++), BB->begin());
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
   PN->reserveOperandSpace(getNumPreds(BB));
@@ -995,9 +999,9 @@ NextIteration:
 ///
 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
                            DominatorTree &DT, DominanceFrontier &DF,
-                           AliasSetTracker *AST) {
+                           LLVMContext &Context, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty()) return;
 
-  PromoteMem2Reg(Allocas, DT, DF, AST).run();
+  PromoteMem2Reg(Allocas, DT, DF, AST, Context).run();
 }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 0000000..780ee26
--- /dev/null
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,335 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
+                IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+  return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+  delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void SSAUpdater::Initialize(Value *ProtoValue) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  
+  if (IPI == 0)
+    IPI = new IncomingPredInfoTy();
+  else
+    getIncomingPredInfo(IPI).clear();
+  PrototypeValue = ProtoValue;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
+  assert(PrototypeValue->getType() == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!getAvailableVals(AV).count(BB))
+    return GetValueAtEndOfBlock(BB);
+  
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(PrototypeValue->getType());
+  
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+  
+  // Otherwise, we do need a PHI: insert one now.
+  PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
+                                         PrototypeValue->getName(),
+                                         &BB->front());
+  InsertedPHI->reserveOperandSpace(PredValues.size());
+  
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  
+  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  BasicBlock *UseBB = User->getParent();
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    UseBB = UserPN->getIncomingBlock(U);
+  
+  U.set(GetValueInMiddleOfBlock(UseBB));
+}
+
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  
+  // Query AvailableVals by doing an insertion of null.
+  std::pair<AvailableValsTy::iterator, bool> InsertRes =
+  AvailableVals.insert(std::make_pair(BB, WeakVH()));
+  
+  // Handle the case when the insertion fails because we have already seen BB.
+  if (!InsertRes.second) {
+    // If the insertion failed, there are two cases.  The first case is that the
+    // value is already available for the specified block.  If we get this, just
+    // return the value.
+    if (InsertRes.first->second != 0)
+      return InsertRes.first->second;
+    
+    // Otherwise, if the value we find is null, then this is the value is not
+    // known but it is being computed elsewhere in our recursion.  This means
+    // that we have a cycle.  Handle this by inserting a PHI node and returning
+    // it.  When we get back to the first instance of the recursion we will fill
+    // in the PHI node.
+    return InsertRes.first->second =
+    PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
+                    &BB->front());
+  }
+  
+  // Okay, the value isn't in the map and we just inserted a null in the entry
+  // to indicate that we're processing the block.  Since we have no idea what
+  // value is in this block, we have to recurse through our predecessors.
+  //
+  // While we're walking our predecessors, we keep track of them in a vector,
+  // then insert a PHI node in the end if we actually need one.  We could use a
+  // smallvector here, but that would take a lot of stack space for every level
+  // of the recursion, just use IncomingPredInfo as an explicit stack.
+  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+  
+  // As we're walking the predecessors, keep track of whether they are all
+  // producing the same value.  If so, this value will capture it, if not, it
+  // will get reset to null.  We distinguish the no-predecessor case explicitly
+  // below.
+  TrackingVH<Value> SingularValue;
+  
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+      
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+  
+  // If there are no predecessors, then we must have found an unreachable block
+  // just return 'undef'.  Since there are no predecessors, InsertRes must not
+  // be invalidated.
+  if (IncomingPredInfo.size() == FirstPredInfoEntry)
+    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
+  
+  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
+  /// this block is involved in a loop, a no-entry PHI node will have been
+  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
+  /// above.
+  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
+  
+  // If all the predecessor values are the same then we don't need to insert a
+  // PHI.  This is the simple and common case.
+  if (SingularValue) {
+    // If a PHI node got inserted, replace it with the singlar value and delete
+    // it.
+    if (InsertedVal) {
+      PHINode *OldVal = cast<PHINode>(InsertedVal);
+      // Be careful about dead loops.  These RAUW's also update InsertedVal.
+      if (InsertedVal != SingularValue)
+        OldVal->replaceAllUsesWith(SingularValue);
+      else
+        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
+      OldVal->eraseFromParent();
+    } else {
+      InsertedVal = SingularValue;
+    }
+    
+    // Drop the entries we added in IncomingPredInfo to restore the stack.
+    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                           IncomingPredInfo.end());
+    return InsertedVal;
+  }
+  
+  // Otherwise, we do need a PHI: insert one now if we don't already have one.
+  if (InsertedVal == 0)
+    InsertedVal = PHINode::Create(PrototypeValue->getType(),
+                                  PrototypeValue->getName(), &BB->front());
+  
+  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
+  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+  
+  // Fill in all the predecessors of the PHI.
+  for (IncomingPredInfoTy::iterator I =
+         IncomingPredInfo.begin()+FirstPredInfoEntry,
+       E = IncomingPredInfo.end(); I != E; ++I)
+    InsertedPHI->addIncoming(I->second, I->first);
+  
+  // Drop the entries we added in IncomingPredInfo to restore the stack.
+  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                         IncomingPredInfo.end());
+  
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->replaceAllUsesWith(ConstVal);
+    InsertedPHI->eraseFromParent();
+    InsertedVal = ConstVal;
+  } else {
+    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    
+    // If the client wants to know about all new instructions, tell it.
+    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  }
+  
+  return InsertedVal;
+}
+
+
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
index 4c4dd37..3bb2e8e 100644
--- a/lib/Transforms/Utils/SSI.cpp
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -23,6 +23,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SSI.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 
 using namespace llvm;
@@ -30,11 +31,12 @@ using namespace llvm;
 static const std::string SSI_PHI = "SSI_phi";
 static const std::string SSI_SIG = "SSI_sigma";
 
-static const unsigned UNSIGNED_INFINITE = ~0U;
+STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
+STATISTIC(NumPhiInserted, "Number of phi functions inserted");
 
 void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominanceFrontier>();
-  AU.addRequired<DominatorTree>();
+  AU.addRequiredTransitive<DominanceFrontier>();
+  AU.addRequiredTransitive<DominatorTree>();
   AU.setPreservesAll();
 }
 
@@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) {
 
 /// This methods creates the SSI representation for the list of values
 /// received. It will only create SSI representation if a value is used
-/// in a to decide a branch. Repeated values are created only once.
+/// to decide a branch. Repeated values are created only once.
 ///
 void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
   init(value);
 
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (created.insert(value[i])) {
-      needConstruction[i] = true;
-    }
-  }
-  insertSigmaFunctions(value);
+  SmallPtrSet<Instruction*, 4> needConstruction;
+  for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    if (created.insert(*I))
+      needConstruction.insert(*I);
+
+  insertSigmaFunctions(needConstruction);
 
   // Test if there is a need to transform to SSI
-  if (needConstruction.any()) {
-    insertPhiFunctions(value);
-    renameInit(value);
+  if (!needConstruction.empty()) {
+    insertPhiFunctions(needConstruction);
+    renameInit(needConstruction);
     rename(DT_->getRoot());
     fixPhis();
   }
@@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
 /// Insert sigma functions (a sigma function is a phi function with one
 /// operator)
 ///
-void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) {
-  for (unsigned i = 0; i < num_values; ++i) {
-    if (!needConstruction[i])
-      continue;
-
-    bool need = false;
-    for (Value::use_iterator begin = value[i]->use_begin(), end =
-         value[i]->use_end(); begin != end; ++begin) {
+void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    for (Value::use_iterator begin = (*I)->use_begin(),
+         end = (*I)->use_end(); begin != end; ++begin) {
       // Test if the Use of the Value is in a comparator
-      CmpInst *CI = dyn_cast<CmpInst>(begin);
-      if (CI && isUsedInTerminator(CI)) {
-        // Basic Block of the Instruction
-        BasicBlock *BB = CI->getParent();
-        // Last Instruction of the Basic Block
-        const TerminatorInst *TI = BB->getTerminator();
-
-        for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) {
-          // Next Basic Block
-          BasicBlock *BB_next = TI->getSuccessor(j);
-          if (BB_next != BB &&
-              BB_next->getUniquePredecessor() != NULL &&
-              dominateAny(BB_next, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_SIG, BB_next->begin());
-            PN->addIncoming(value[i], BB);
-            sigmas.insert(std::make_pair(PN, i));
-            created.insert(PN);
-            need = true;
-            defsites[i].push_back(BB_next);
+      if (CmpInst *CI = dyn_cast<CmpInst>(begin)) {
+        // Iterates through all uses of CmpInst
+        for (Value::use_iterator begin_ci = CI->use_begin(),
+             end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
+          // Test if any use of CmpInst is in a Terminator
+          if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) {
+            insertSigma(TI, *I);
           }
         }
       }
     }
-    needConstruction[i] = need;
+  }
+}
+
+/// Inserts Sigma Functions in every BasicBlock successor to Terminator
+/// Instruction TI. All inserted Sigma Function are related to Instruction I.
+///
+void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
+  // Basic Block of the Terminator Instruction
+  BasicBlock *BB = TI->getParent();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+    // Next Basic Block
+    BasicBlock *BB_next = TI->getSuccessor(i);
+    if (BB_next != BB &&
+        BB_next->getSinglePredecessor() != NULL &&
+        dominateAny(BB_next, I)) {
+      PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
+      PN->addIncoming(I, BB);
+      sigmas[PN] = I;
+      created.insert(PN);
+      defsites[I].push_back(BB_next);
+      ++NumSigmaInserted;
+    }
   }
 }
 
 /// Insert phi functions when necessary
 ///
-void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) {
+void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
   DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
-  for (unsigned i = 0; i < num_values; ++i) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
     // Test if there were any sigmas for this variable
-    if (needConstruction[i]) {
-
-      SmallPtrSet<BasicBlock *, 1> BB_visited;
-
-      // Insert phi functions if there is any sigma function
-      while (!defsites[i].empty()) {
-
-        BasicBlock *BB = defsites[i].back();
-
-        defsites[i].pop_back();
-        DominanceFrontier::iterator DF_BB = DF->find(BB);
-
-        // Iterates through all the dominance frontier of BB
-        for (std::set<BasicBlock *>::iterator DF_BB_begin =
-             DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
-             DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
-          BasicBlock *BB_dominated = *DF_BB_begin;
-
-          // Test if has not yet visited this node and if the
-          // original definition dominates this node
-          if (BB_visited.insert(BB_dominated) &&
-              DT_->properlyDominates(value_original[i], BB_dominated) &&
-              dominateAny(BB_dominated, value[i])) {
-            PHINode *PN = PHINode::Create(
-                value[i]->getType(), SSI_PHI, BB_dominated->begin());
-            phis.insert(std::make_pair(PN, i));
-            created.insert(PN);
-
-            defsites[i].push_back(BB_dominated);
-          }
+    SmallPtrSet<BasicBlock *, 16> BB_visited;
+
+    // Insert phi functions if there is any sigma function
+    while (!defsites[*I].empty()) {
+
+      BasicBlock *BB = defsites[*I].back();
+
+      defsites[*I].pop_back();
+      DominanceFrontier::iterator DF_BB = DF->find(BB);
+
+      // The BB is unreachable. Skip it.
+      if (DF_BB == DF->end())
+        continue; 
+
+      // Iterates through all the dominance frontier of BB
+      for (std::set<BasicBlock *>::iterator DF_BB_begin =
+           DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
+           DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
+        BasicBlock *BB_dominated = *DF_BB_begin;
+
+        // Test if has not yet visited this node and if the
+        // original definition dominates this node
+        if (BB_visited.insert(BB_dominated) &&
+            DT_->properlyDominates(value_original[*I], BB_dominated) &&
+            dominateAny(BB_dominated, *I)) {
+          PHINode *PN = PHINode::Create(
+              (*I)->getType(), SSI_PHI, BB_dominated->begin());
+          phis.insert(std::make_pair(PN, *I));
+          created.insert(PN);
+
+          defsites[*I].push_back(BB_dominated);
+          ++NumPhiInserted;
         }
       }
-      BB_visited.clear();
     }
+    BB_visited.clear();
   }
 }
 
 /// Some initialization for the rename part
 ///
-void SSI::renameInit(SmallVectorImpl<Instruction *> &value) {
-  value_stack.resize(num_values);
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_stack[i].push_back(value[i]);
-  }
+void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
+  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I)
+    value_stack[*I].push_back(*I);
 }
 
 /// Renames all variables in the specified BasicBlock.
 /// Only variables that need to be rename will be.
 ///
 void SSI::rename(BasicBlock *BB) {
-  BitVector *defined = new BitVector(num_values, false);
+  SmallPtrSet<Instruction*, 8> defined;
 
   // Iterate through instructions and make appropriate renaming.
   // For SSI_PHI (b = PHI()), store b at value_stack as a new
@@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) {
        begin != end; ++begin) {
     Instruction *I = begin;
     if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
-      int position;
+      Instruction* position;
 
       // Treat SSI_PHI
-      if ((position = getPositionPhi(PN)) != -1) {
+      if ((position = getPositionPhi(PN))) {
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
-      }
-
+        defined.insert(position);
       // Treat SSI_SIG
-      else if ((position = getPositionSigma(PN)) != -1) {
+      } else if ((position = getPositionSigma(PN))) {
         substituteUse(I);
         value_stack[position].push_back(PN);
-        (*defined)[position] = true;
+        defined.insert(position);
       }
 
       // Treat all other PHI functions
@@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) {
     for (BasicBlock::iterator begin = BB_succ->begin(),
          notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
       Instruction *I = begin;
-      PHINode *PN;
-      int position;
-      if ((PN = dyn_cast<PHINode>(I)) && ((position
-          = getPositionPhi(PN)) != -1)) {
+      PHINode *PN = dyn_cast<PHINode>(I);
+      Instruction* position;
+      if (PN && ((position = getPositionPhi(PN)))) {
         PN->addIncoming(value_stack[position].back(), BB);
       }
     }
@@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) {
 
   // Now we remove all inserted definitions of a variable from the top of
   // the stack leaving the previous one as the top.
-  if (defined->any()) {
-    for (unsigned i = 0; i < num_values; ++i) {
-      if ((*defined)[i]) {
-        value_stack[i].pop_back();
-      }
-    }
-  }
+  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
+       DE = defined.end(); DI != DE; ++DI)
+    value_stack[*DI].pop_back();
 }
 
 /// Substitute any use in this instruction for the last definition of
@@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) {
 void SSI::substituteUse(Instruction *I) {
   for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
     Value *operand = I->getOperand(i);
-    for (unsigned j = 0; j < num_values; ++j) {
-      if (operand == value_stack[j].front() &&
-          I != value_stack[j].back()) {
+    for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
+         VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
+      if (operand == VI->second.front() &&
+          I != VI->second.back()) {
         PHINode *PN_I = dyn_cast<PHINode>(I);
-        PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back());
+        PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
 
         // If a phi created in a BasicBlock is used as an operand of another
         // created in the same BasicBlock, this step marks this second phi,
         // to fix this issue later. It cannot be fixed now, because the
         // operands of the first phi are not final yet.
         if (PN_I && PN_vs &&
-            value_stack[j].back()->getParent() == I->getParent()) {
+            VI->second.back()->getParent() == I->getParent()) {
 
           phisToFix.insert(PN_I);
         }
 
-        I->setOperand(i, value_stack[j].back());
+        I->setOperand(i, VI->second.back());
         break;
       }
     }
@@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) {
 }
 
 /// Test if the BasicBlock BB dominates any use or definition of value.
+/// If it dominates a phi instruction that is on the same BasicBlock,
+/// that does not count.
 ///
 bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
   for (Value::use_iterator begin = value->use_begin(),
        end = value->use_end(); begin != end; ++begin) {
     Instruction *I = cast<Instruction>(*begin);
     BasicBlock *BB_father = I->getParent();
+    if (BB == BB_father && isa<PHINode>(I))
+      continue;
     if (DT_->dominates(BB, BB_father)) {
       return true;
     }
@@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
 /// as an operand of another phi function used in the same BasicBlock,
 /// LLVM looks this as an error. So on the second phi, the first phi is called
 /// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B.
+/// it has for BasicBlock B. It also includes undef values for predecessors
+/// that were not included in the phi.
 ///
 void SSI::fixPhis() {
   for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
        end = phisToFix.end(); begin != end; ++begin) {
     PHINode *PN = *begin;
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
-      PHINode *PN_father;
-      if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) &&
-          PN->getParent() == PN_father->getParent()) {
+      PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
+      if (PN_father && PN->getParent() == PN_father->getParent() &&
+          !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
         BasicBlock *BB = PN->getIncomingBlock(i);
         int pos = PN_father->getBasicBlockIndex(BB);
         PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
       }
     }
   }
+
+  for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
+       end = phis.end(); begin != end; ++begin) {
+    PHINode *PN = begin->first;
+    BasicBlock *BB = PN->getParent();
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    SmallVector<BasicBlock*, 8> Preds(PI, PE);
+    for (unsigned size = Preds.size();
+         PI != PE && PN->getNumIncomingValues() != size; ++PI) {
+      bool found = false;
+      for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
+           i < pn_end; ++i) {
+        if (PN->getIncomingBlock(i) == *PI) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        PN->addIncoming(UndefValue::get(PN->getType()), *PI);
+      }
+    }
+  }
 }
 
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the phis list.
 ///
-unsigned SSI::getPositionPhi(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN);
+Instruction* SSI::getPositionPhi(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
   if (val == phis.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
@@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) {
 /// Return which variable (position on the vector of variables) this phi
 /// represents on the sigmas list.
 ///
-unsigned SSI::getPositionSigma(PHINode *PN) {
-  DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN);
+Instruction* SSI::getPositionSigma(PHINode *PN) {
+  DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
   if (val == sigmas.end())
-    return UNSIGNED_INFINITE;
+    return 0;
   else
     return val->second;
 }
 
-/// Return true if the the Comparison Instruction is an operator
-/// of the Terminator instruction of its Basic Block.
-///
-unsigned SSI::isUsedInTerminator(CmpInst *CI) {
-  TerminatorInst *TI = CI->getParent()->getTerminator();
-  if (TI->getNumOperands() == 0) {
-    return false;
-  } else if (CI == TI->getOperand(0)) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
 /// Initializes
 ///
 void SSI::init(SmallVectorImpl<Instruction *> &value) {
-  num_values = value.size();
-  needConstruction.resize(num_values, false);
-
-  value_original.resize(num_values);
-  defsites.resize(num_values);
-
-  for (unsigned i = 0; i < num_values; ++i) {
-    value_original[i] = value[i]->getParent();
-    defsites[i].push_back(value_original[i]);
+  for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
+       E = value.end(); I != E; ++I) {
+    value_original[*I] = (*I)->getParent();
+    defsites[*I].push_back((*I)->getParent());
   }
 }
 
 /// Clean all used resources in this creation of SSI
 ///
 void SSI::clean() {
-  for (unsigned i = 0; i < num_values; ++i) {
-    defsites[i].clear();
-    if (i < value_stack.size())
-      value_stack[i].clear();
-  }
-
   phis.clear();
   sigmas.clear();
   phisToFix.clear();
@@ -378,7 +384,6 @@ void SSI::clean() {
   defsites.clear();
   value_stack.clear();
   value_original.clear();
-  needConstruction.clear();
 }
 
 /// createSSIPass - The public interface to this file...
@@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); }
 char SSI::ID = 0;
 static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
 
+/// SSIEverything - A pass that runs createSSI on every non-void variable,
+/// intended for debugging.
+namespace {
+  struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    SSIEverything() : FunctionPass(&ID) {}
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<SSI>();
+    }
+  };
+}
+
+bool SSIEverything::runOnFunction(Function &F) {
+  SmallVector<Instruction *, 16> Insts;
+  SSI &ssi = getAnalysis<SSI>();
+
+  if (F.isDeclaration() || F.isIntrinsic()) return false;
+
+  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
+    for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
+      if (I->getType() != Type::getVoidTy(F.getContext()))
+        Insts.push_back(I);
+
+  ssi.createSSI(Insts);
+  return true;
+}
+
+/// createSSIEverythingPass - The public interface to this file...
+///
+FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
+
+char SSIEverything::ID = 0;
+static RegisterPass<SSIEverything>
+Y("ssi-everything", "Static Single Information Construction");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 58d4d5a..6fd7d7b 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -21,6 +21,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/SmallVector.h"
@@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
-  DOUT << "Looking to fold " << BB->getNameStart() << " into " 
-       << Succ->getNameStart() << "\n";
+  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
   // Shortcut, if there is only a single predecessor it must be BB and merging
   // is always safe
   if (Succ->getSinglePredecessor()) return true;
 
-  typedef SmallPtrSet<Instruction*, 16> InstrSet;
-  InstrSet BBPHIs;
-
-  // Make a list of all phi nodes in BB
-  BasicBlock::iterator BBI = BB->begin();
-  while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++);
-
   // Make a list of the predecessors of BB
   typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
   BlockSet BBPreds(pred_begin(BB), pred_end(BB));
@@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
             PI != PE; PI++) {
         if (BBPN->getIncomingValueForBlock(*PI) 
               != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-               << Succ->getNameStart() << " is conflicting with " 
-               << BBPN->getNameStart() << " with regard to common predecessor "
-               << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
           return false;
         }
       }
-      // Remove this phinode from the list of phis in BB, since it has been
-      // handled.
-      BBPHIs.erase(BBPN);
     } else {
       Value* Val = PN->getIncomingValueForBlock(BB);
       for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
@@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
         if (Val != PN->getIncomingValueForBlock(*PI)) {
-          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-          << Succ->getNameStart() << " is conflicting with regard to common "
-          << "predecessor " << (*PI)->getNameStart() << "\n";
+          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
           return false;
         }
       }
     }
   }
 
-  // If there are any other phi nodes in BB that don't have a phi node in Succ
-  // to merge with, they must be moved to Succ completely. However, for any
-  // predecessors of Succ, branches will be added to the phi node that just
-  // point to itself. So, for any common predecessors, this must not cause
-  // conflicts.
-  for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end();
-        I != E; I++) {
-    PHINode *PN = cast<PHINode>(*I);
-    for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-          PI != PE; PI++)
-      if (PN->getIncomingValueForBlock(*PI) != PN) {
-        DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " 
-             << BB->getNameStart() << " is conflicting with regard to common "
-             << "predecessor " << (*PI)->getNameStart() << "\n";
-        return false;
-      }
-  }
-
   return true;
 }
 
@@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
   // Check to see if merging these blocks would cause conflicts for any of the
   // phi nodes in BB or Succ. If not, we can safely merge.
   if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-  
-  DOUT << "Killing Trivial BB: \n" << *BB;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
   
   if (isa<PHINode>(Succ->begin())) {
     // If there is more than one pred of succ, and there are PHI nodes in
@@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
     }
   }
   
-  if (isa<PHINode>(&BB->front())) {
-    SmallVector<BasicBlock*, 16>
-    OldSuccPreds(pred_begin(Succ), pred_end(Succ));
-    
-    // Move all PHI nodes in BB to Succ if they are alive, otherwise
-    // delete them.
-    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-      if (PN->use_empty()) {
-        // Just remove the dead phi.  This happens if Succ's PHIs were the only
-        // users of the PHI nodes.
-        PN->eraseFromParent();
-        continue;
-      }
-    
-      // The instruction is alive, so this means that BB must dominate all
-      // predecessors of Succ (Since all uses of the PN are after its
-      // definition, so in Succ or a block dominated by Succ. If a predecessor
-      // of Succ would not be dominated by BB, PN would violate the def before
-      // use SSA demand). Therefore, we can simply move the phi node to the
-      // next block.
+  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+    if (Succ->getSinglePredecessor()) {
+      // BB is the only predecessor of Succ, so Succ will end up with exactly
+      // the same predecessors BB had.
       Succ->getInstList().splice(Succ->begin(),
                                  BB->getInstList(), BB->begin());
-      
-      // We need to add new entries for the PHI node to account for
-      // predecessors of Succ that the PHI node does not take into
-      // account.  At this point, since we know that BB dominated succ and all
-      // of its predecessors, this means that we should any newly added
-      // incoming edges should use the PHI node itself as the value for these
-      // edges, because they are loop back edges.
-      for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i)
-        if (OldSuccPreds[i] != BB)
-          PN->addIncoming(PN, OldSuccPreds[i]);
+    } else {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
     }
   }
     
@@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
       // Okay, it looks like the instruction IS in the "condition".  Check to
       // see if its a cheap instruction to unconditionally compute, and if it
       // only uses stuff defined outside of the condition.  If so, hoist it out.
+      if (!I->isSafeToSpeculativelyExecute())
+        return false;
+
       switch (I->getOpcode()) {
       default: return false;  // Cannot hoist this out safely.
       case Instruction::Load: {
-        // We can hoist loads that are non-volatile and obviously cannot trap.
-        if (cast<LoadInst>(I)->isVolatile())
-          return false;
-        // FIXME: A computation of a constant can trap!
-        if (!isa<AllocaInst>(I->getOperand(0)) &&
-            !isa<Constant>(I->getOperand(0)))
-          return false;
-        // External weak globals may have address 0, so we can't load them.
-        Value *V2 = I->getOperand(0)->getUnderlyingObject();
-        if (V2) {
-          GlobalVariable* GV = dyn_cast<GlobalVariable>(V2);
-          if (GV && GV->hasExternalWeakLinkage())
-            return false;
-        }
-        // Finally, we have to check to make sure there are no instructions
-        // before the load in its basic block, as we are going to hoist the loop
-        // out to its predecessor.
+        // We have to check to make sure there are no instructions before the
+        // load in its basic block, as we are going to hoist the loop out to
+        // its predecessor.
         BasicBlock::iterator IP = PBB->begin();
         while (isa<DbgInfoIntrinsic>(IP))
           IP++;
@@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         assert(ThisCases.size() == 1 && "Branch can only have one case!");
         // Insert the new branch.
         Instruction *NI = BranchInst::Create(ThisDef, TI);
+        (void) NI;
 
         // Remove PHI node entries for the dead edge.
         ThisCases[0].second->removePredecessor(TI->getParent());
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
         EraseTerminatorInstAndDCECond(TI);
         return true;
@@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
           DeadCases.insert(PredCases[i].first);
 
-        DOUT << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI;
+        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+                     << "Through successor TI: " << *TI);
 
         for (unsigned i = SI->getNumCases()-1; i != 0; --i)
           if (DeadCases.count(SI->getCaseValue(i))) {
@@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
             SI->removeCase(i);
           }
 
-        DOUT << "Leaving: " << *TI << "\n";
+        DEBUG(errs() << "Leaving: " << *TI << "\n");
         return true;
       }
     }
@@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 
     // Insert the new branch.
     Instruction *NI = BranchInst::Create(TheRealDest, TI);
+    (void) NI;
 
-    DOUT << "Threading pred instr: " << *Pred->getTerminator()
-         << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+    DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
     EraseTerminatorInstAndDCECond(TI);
     return true;
@@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
           if (InfLoopBlock == 0) {
             // Insert it at the end of the function, because it's either code,
             // or it won't matter if it's hot. :)
-            InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
             BranchInst::Create(InfLoopBlock, InfLoopBlock);
           }
           NewSI->setSuccessor(i, InfLoopBlock);
@@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   while (isa<DbgInfoIntrinsic>(I2))
     I2 = BB2_Itr++;
   if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
-      !I1->isIdenticalTo(I2) ||
+      !I1->isIdenticalToWhenDefined(I2) ||
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
@@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
     BB2->getInstList().erase(I2);
 
     I1 = BB1_Itr++;
@@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I2 = BB2_Itr++;
     while (isa<DbgInfoIntrinsic>(I2))
       I2 = BB2_Itr++;
-  } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2));
+  } while (I1->getOpcode() == I2->getOpcode() &&
+           I1->isIdenticalToWhenDefined(I2));
 
   return true;
 
@@ -939,7 +912,7 @@ HoistTerminator:
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
-  if (NT->getType() != Type::VoidTy) {
+  if (NT->getType() != Type::getVoidTy(BB1->getContext())) {
     I1->replaceAllUsesWith(NT);
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
@@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType() == Type::Int1Ty) {
+        CB->getType() == Type::getInt1Ty(BB->getContext())) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
       // difficult cases.  Instead of being smart about this, just insert a new
       // block that jumps to the destination block, effectively splitting
       // the edge we are about to create.
-      BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge",
+      BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                              RealDest->getName()+".critedge",
                                               RealDest->getParent(), RealDest);
       BranchInst::Create(RealDest, EdgeBB);
       PHINode *PN;
@@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
           }
           
           // Check for trivial simplification.
-          if (Constant *C = ConstantFoldInstruction(N)) {
+          if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
             TranslateMap[BBI] = C;
             delete N;   // Constant folded away, don't need actual inst
           } else {
@@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
     if (NumPhis > 2)
       return false;
   
-  DOUT << "FOUND IF CONDITION!  " << *IfCond << "  T: "
-       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n";
+  DEBUG(errs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+        << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
   
   // Loop over the PHI's seeing if we can promote them all to select
   // instructions.  While we are at it, keep track of the instructions
@@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   if (FalseRet->getNumOperands() == 0) {
     TrueSucc->removePredecessor(BI->getParent());
     FalseSucc->removePredecessor(BI->getParent());
-    ReturnInst::Create(0, BI);
+    ReturnInst::Create(BI->getContext(), 0, BI);
     EraseTerminatorInstAndDCECond(BI);
     return true;
   }
@@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   }
 
   Value *RI = !TrueValue ?
-              ReturnInst::Create(BI) :
-              ReturnInst::Create(TrueValue, BI);
+              ReturnInst::Create(BI->getContext(), BI) :
+              ReturnInst::Create(BI->getContext(), TrueValue, BI);
+  (void) RI;
       
-  DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
-       << "\n  " << *BI << "NewRet = " << *RI
-       << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc;
+  DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
       
   EraseTerminatorInstAndDCECond(BI);
 
@@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     else
       continue;
 
-    DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB;
+    DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
@@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   assert(PBI->isConditional() && BI->isConditional());
   BasicBlock *BB = BI->getParent();
-  
+
   // If this block ends with a branch instruction, and if there is a
   // predecessor that ends on a branch of the same condition, make 
   // this conditional branch redundant.
@@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     if (BB->getSinglePredecessor()) {
       // Turn this into a branch on constant.
       bool CondIsTrue = PBI->getSuccessor(0) == BB;
-      BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue));
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
       return true;  // Nuke the branch on constant.
     }
     
@@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // in the constant and simplify the block result.  Subsequent passes of
     // simplifycfg will thread the block.
     if (BlockIsSimpleEnoughToThreadThrough(BB)) {
-      PHINode *NewPN = PHINode::Create(Type::Int1Ty,
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
                                        BI->getCondition()->getName() + ".pr",
                                        BB->begin());
       // Okay, we're going to insert the PHI node.  Since PBI is not the only
@@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
             PBI->getCondition() == BI->getCondition() &&
             PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
           bool CondIsTrue = PBI->getSuccessor(0) == BB;
-          NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, 
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
                                               CondIsTrue), *PI);
         } else {
           NewPN->addIncoming(BI->getCondition(), *PI);
@@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   // Finally, if everything is ok, fold the branches to logical ops.
   BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
   
-  DOUT << "FOLDING BRs:" << *PBI->getParent()
-       << "AND: " << *BI->getParent();
+  DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
   
   
   // If OtherDest *is* BB, then BB is a basic block with a single conditional
@@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   if (OtherDest == BB) {
     // Insert it at the end of the function, because it's either code,
     // or it won't matter if it's hot. :)
-    BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent());
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
     BranchInst::Create(InfLoopBlock, InfLoopBlock);
     OtherDest = InfLoopBlock;
   }  
   
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // BI may have other predecessors.  Because of this, we leave
   // it alone, but modify PBI.
@@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     }
   }
   
-  DOUT << "INTO: " << *PBI->getParent();
-  
-  DOUT << *PBI->getParent()->getParent();
+  DEBUG(errs() << "INTO: " << *PBI->getParent());
+  DEBUG(errs() << *PBI->getParent()->getParent());
   
   // This basic block is probably dead.  We know it has at least
   // one fewer predecessor.
@@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   // Remove basic blocks that have no predecessors... or that just have themself
   // as a predecessor.  These are unreachable.
   if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
-    DOUT << "Removing BB: \n" << *BB;
+    DEBUG(errs() << "Removing BB: \n" << *BB);
     DeleteDeadBlock(BB);
     return true;
   }
@@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
       if (!UncondBranchPreds.empty()) {
         while (!UncondBranchPreds.empty()) {
           BasicBlock *Pred = UncondBranchPreds.pop_back_val();
-          DOUT << "FOLDING: " << *BB
-               << "INTO UNCOND BRANCH PRED: " << *Pred;
+          DEBUG(errs() << "FOLDING: " << *BB
+                       << "INTO UNCOND BRANCH PRED: " << *Pred);
           Instruction *UncondBranch = Pred->getTerminator();
           // Clone the return and add it to the end of the predecessor.
           Instruction *NewRet = RI->clone();
@@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   } else if (isa<UnwindInst>(BB->begin())) {
     // Check to see if the first instruction in this block is just an unwind.
     // If so, replace any invoke instructions which use this as an exception
-    // destination with call instructions, and any unconditional branch
-    // predecessor with an unwind.
+    // destination with call instructions.
     //
     SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
     while (!Preds.empty()) {
       BasicBlock *Pred = Preds.back();
-      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) {
-        if (BI->isUnconditional()) {
-          Pred->getInstList().pop_back();  // nuke uncond branch
-          new UnwindInst(Pred);            // Use unwind.
-          Changed = true;
-        }
-      } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
         if (II->getUnwindDest() == BB) {
           // Insert a new branch instruction before the invoke, because this
-          // is now a fall through...
+          // is now a fall through.
           BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
           Pred->getInstList().remove(II);   // Take out of symbol table
 
-          // Insert the call now...
+          // Insert the call now.
           SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
           CallInst *CI = CallInst::Create(II->getCalledValue(),
                                           Args.begin(), Args.end(),
                                           II->getName(), BI);
           CI->setCallingConv(II->getCallingConv());
           CI->setAttributes(II->getAttributes());
-          // If the invoke produced a value, the Call now does instead
+          // If the invoke produced a value, the Call now does instead.
           II->replaceAllUsesWith(CI);
           delete II;
           Changed = true;
@@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
         if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
           if (BI->isUnconditional()) {
             if (BI->getSuccessor(0) == BB) {
-              new UnreachableInst(TI);
+              new UnreachableInst(TI->getContext(), TI);
               TI->eraseFromParent();
               Changed = true;
             }
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 848f2b8..30cb94d 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnwindingBlocks.size() == 1) {
     UnwindBlock = UnwindingBlocks.front();
   } else {
-    UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F);
-    new UnwindInst(UnwindBlock);
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
            E = UnwindingBlocks.end(); I != E; ++I) {
@@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnreachableBlocks.size() == 1) {
     UnreachableBlock = UnreachableBlocks.front();
   } else {
-    UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F);
-    new UnreachableInst(UnreachableBlock);
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
 
     for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
            E = UnreachableBlocks.end(); I != E; ++I) {
@@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   // nodes (if the function returns values), and convert all of the return
   // instructions into unconditional branches.
   //
-  BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F);
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
 
   PHINode *PN = 0;
-  if (F.getReturnType() == Type::VoidTy) {
-    ReturnInst::Create(NULL, NewRetBlock);
+  if (F.getReturnType() == Type::getVoidTy(F.getContext())) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
     PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
     NewRetBlock->getInstList().push_back(PN);
-    ReturnInst::Create(PN, NewRetBlock);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
   }
 
   // Loop over all of the blocks, replacing the return instruction with an
diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp
index caef7ec..4d838b5 100644
--- a/lib/Transforms/Utils/UnrollLoop.cpp
+++ b/lib/Transforms/Utils/UnrollLoop.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
     return 0;
 
-  DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
+  DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred);
 
   // Resolve any PHI nodes at the start of the block.  They are all
   // guaranteed to have exactly one entry if they exist, unless there are
@@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   
   if (!BI || BI->isUnconditional()) {
     // The loop-rotate pass can be helpful to avoid this in many cases.
-    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
+    DEBUG(errs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
     return false;
   }
 
@@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     TripMultiple = L->getSmallConstantTripMultiple();
 
   if (TripCount != 0)
-    DOUT << "  Trip Count = " << TripCount << "\n";
+    DEBUG(errs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
-    DOUT << "  Trip Multiple = " << TripMultiple << "\n";
+    DEBUG(errs() << "  Trip Multiple = " << TripMultiple << "\n");
 
   // Effectively "DCE" unrolled iterations that are beyond the tripcount
   // and will never be executed.
@@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   }
 
   if (CompletelyUnroll) {
-    DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
-         << " with trip count " << TripCount << "!\n";
+    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
   } else {
-    DOUT << "UNROLLING loop %" << Header->getName()
-         << " by " << Count;
+    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
-      DOUT << " with a breakout at trip " << BreakoutTrip;
+      DEBUG(errs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
-      DOUT << " with " << TripMultiple << " trips per branch";
+      DEBUG(errs() << " with " << TripMultiple << " trips per branch");
     }
-    DOUT << "!\n";
+    DEBUG(errs() << "!\n");
   }
 
   std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
@@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
+      else if (Constant *C = ConstantFoldInstruction(Inst, 
+                                                     Header->getContext())) {
         Inst->replaceAllUsesWith(C);
         (*BB)->getInstList().erase(Inst);
       }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 20b676d..2d8332f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,23 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"  // For getNullValue(Type::Int32Ty)
 #include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instruction.h"
-#include "llvm/MDNode.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {
   Value *&VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
   // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
   // DenseMap.  This includes any recursive calls to MapValue.
 
-  // Global values do not need to be seeded into the ValueMap if they are using
-  // the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V))
+  // Global values and metadata do not need to be seeded into the ValueMap if 
+  // they are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V))
     return VMSlot = const_cast<Value*>(V);
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
@@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
       for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This array must contain a reference to a global, make a new array
           // and return it.
@@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantArray::get(CA->getType(), Values);
         }
       }
@@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
       for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This struct must contain a reference to a global, make a new struct
           // and return it.
@@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantStruct::get(CS->getType(), Values);
         }
       }
@@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       std::vector<Constant*> Ops;
       for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-        Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+        Ops.push_back(cast<Constant>(MapValue(*i, VM, Context)));
       return VM[V] = CE->getWithOperands(Ops);
     } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
       for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();
            i != e; ++i) {
-        Value *MV = MapValue(*i, VM);
+        Value *MV = MapValue(*i, VM, Context);
         if (MV != *i) {
           // This vector value must contain a reference to a global, make a new
           // vector constant and return it.
@@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
             Values.push_back(cast<Constant>(*j));
           Values.push_back(cast<Constant>(MV));
           for (++i; i != e; ++i)
-            Values.push_back(cast<Constant>(MapValue(*i, VM)));
+            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));
           return VM[V] = ConstantVector::get(Values);
         }
       }
       return VM[V] = C;
       
-    } else if (MDNode *N = dyn_cast<MDNode>(C)) {
-      for (MDNode::const_elem_iterator b = N->elem_begin(), i = b,
-             e = N->elem_end(); i != e; ++i) {
-        if (!*i) continue;
-
-        Value *MV = MapValue(*i, VM);
-        if (MV != *i) {
-          // This MDNode must contain a reference to a global, make a new MDNode
-          // and return it.
-	  SmallVector<Value*, 8> Values;
-          Values.reserve(N->getNumElements());
-          for (MDNode::const_elem_iterator j = b; j != i; ++j)
-            Values.push_back(*j);
-          Values.push_back(MV);
-          for (++i; i != e; ++i)
-            Values.push_back(MapValue(*i, VM));
-          return VM[V] = MDNode::get(Values.data(), Values.size());
-        }
-      }
-      return VM[V] = C;
-
     } else {
-      assert(0 && "Unknown type of constant!");
+      llvm_unreachable("Unknown type of constant!");
     }
   }
-
   return 0;
 }
 
@@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 ///
 void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, ValueMap);
+    Value *V = MapValue(*op, ValueMap, I->getParent()->getContext());
     assert(V && "Referenced value not in value map!");
     *op = V;
   }
-- 
cgit v1.1