18 files changed, 538 insertions, 1074 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 8000d0d..91534a7 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -30,9 +30,8 @@ namespace {
 
     virtual bool runOnFunction(Function &F) {
       HelloCounter++;
-      std::string fname = F.getName();
-      EscapeString(fname);
-      errs() << "Hello: " << fname << "\n";
+      errs() << "Hello: ";
+      errs().write_escaped(F.getName()) << '\n';
       return false;
     }
   };
@@ -49,9 +48,8 @@ namespace {
 
     virtual bool runOnFunction(Function &F) {
       HelloCounter++;
-      std::string fname = F.getName();
-      EscapeString(fname);
-      errs() << "Hello: " << fname << "\n";
+      errs() << "Hello: ";
+      errs().write_escaped(F.getName()) << '\n';
       return false;
     }
 
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index ec0f1e1..5c28801 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -9,7 +9,6 @@ add_llvm_library(LLVMipo
   GlobalOpt.cpp
   IPConstantPropagation.cpp
   IPO.cpp
-  IndMemRemoval.cpp
   InlineAlways.cpp
   InlineSimple.cpp
   Inliner.cpp
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 7edaa7f..0701b94 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -153,7 +153,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
         // Writes memory.  Just give up.
         return false;
 
-      if (isa<MallocInst>(I))
+      if (isMalloc(I))
         // malloc claims not to write memory!  PR3754.
         return false;
 
@@ -267,11 +267,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 
         // Check whether the pointer came from an allocation.
         case Instruction::Alloca:
-        case Instruction::Malloc:
           break;
         case Instruction::Call:
-          if (isMalloc(RVI))
-            break;
         case Instruction::Invoke: {
           CallSite CS(RVI);
           if (CS.paramHasAttr(0, Attribute::NoAlias))
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 09f9e7c..8f4e8b3 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -149,8 +149,6 @@ bool GlobalDCE::runOnModule(Module &M) {
   // Make sure that all memory is released
   AliveGlobals.clear();
 
-  // Remove dead metadata.
-  Changed |= M.getContext().RemoveDeadMetadata();
   return Changed;
 }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a44386e..9ced2e8 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -822,140 +822,18 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
 /// malloc into a global, and any loads of GV as uses of the new global.
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
-                                                     MallocInst *MI,
-                                                     LLVMContext &Context) {
-  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI);
-  ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());
-
-  if (NElements->getZExtValue() != 1) {
-    // If we have an array allocation, transform it to a single element
-    // allocation to make the code below simpler.
-    Type *NewTy = ArrayType::get(MI->getAllocatedType(),
-                                 NElements->getZExtValue());
-    MallocInst *NewMI =
-      new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)),
-                     MI->getAlignment(), MI->getName(), MI);
-    Value* Indices[2];
-    Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context));
-    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
-                                              NewMI->getName()+".el0", MI);
-    MI->replaceAllUsesWith(NewGEP);
-    MI->eraseFromParent();
-    MI = NewMI;
-  }
-
-  // Create the new global variable.  The contents of the malloc'd memory is
-  // undefined, so initialize with an undef value.
-  // FIXME: This new global should have the alignment returned by malloc.  Code
-  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
-  // this would only guarantee some lower alignment.
-  Constant *Init = UndefValue::get(MI->getAllocatedType());
-  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
-                                             MI->getAllocatedType(), false,
-                                             GlobalValue::InternalLinkage, Init,
-                                             GV->getName()+".body",
-                                             GV,
-                                             GV->isThreadLocal());
-  
-  // Anything that used the malloc now uses the global directly.
-  MI->replaceAllUsesWith(NewGV);
-
-  Constant *RepValue = NewGV;
-  if (NewGV->getType() != GV->getType()->getElementType())
-    RepValue = ConstantExpr::getBitCast(RepValue, 
-                                        GV->getType()->getElementType());
-
-  // If there is a comparison against null, we will insert a global bool to
-  // keep track of whether the global was initialized yet or not.
-  GlobalVariable *InitBool =
-    new GlobalVariable(Context, Type::getInt1Ty(Context), false,
-                       GlobalValue::InternalLinkage,
-                       ConstantInt::getFalse(Context), GV->getName()+".init",
-                       GV->isThreadLocal());
-  bool InitBoolUsed = false;
-
-  // Loop over all uses of GV, processing them in turn.
-  std::vector<StoreInst*> Stores;
-  while (!GV->use_empty())
-    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
-      while (!LI->use_empty()) {
-        Use &LoadUse = LI->use_begin().getUse();
-        if (!isa<ICmpInst>(LoadUse.getUser()))
-          LoadUse = RepValue;
-        else {
-          ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser());
-          // Replace the cmp X, 0 with a use of the bool value.
-          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);
-          InitBoolUsed = true;
-          switch (CI->getPredicate()) {
-          default: llvm_unreachable("Unknown ICmp Predicate!");
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_SLT:
-            LV = ConstantInt::getFalse(Context);   // X < null -> always false
-            break;
-          case ICmpInst::ICMP_ULE:
-          case ICmpInst::ICMP_SLE:
-          case ICmpInst::ICMP_EQ:
-            LV = BinaryOperator::CreateNot(LV, "notinit", CI);
-            break;
-          case ICmpInst::ICMP_NE:
-          case ICmpInst::ICMP_UGE:
-          case ICmpInst::ICMP_SGE:
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_SGT:
-            break;  // no change.
-          }
-          CI->replaceAllUsesWith(LV);
-          CI->eraseFromParent();
-        }
-      }
-      LI->eraseFromParent();
-    } else {
-      StoreInst *SI = cast<StoreInst>(GV->use_back());
-      // The global is initialized when the store to it occurs.
-      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
-      SI->eraseFromParent();
-    }
-
-  // If the initialization boolean was used, insert it, otherwise delete it.
-  if (!InitBoolUsed) {
-    while (!InitBool->use_empty())  // Delete initializations
-      cast<Instruction>(InitBool->use_back())->eraseFromParent();
-    delete InitBool;
-  } else
-    GV->getParent()->getGlobalList().insert(GV, InitBool);
-
-
-  // Now the GV is dead, nuke it and the malloc.
-  GV->eraseFromParent();
-  MI->eraseFromParent();
-
-  // To further other optimizations, loop over all users of NewGV and try to
-  // constant prop them.  This will promote GEP instructions with constant
-  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
-  ConstantPropUsersOf(NewGV, Context);
-  if (RepValue != NewGV)
-    ConstantPropUsersOf(RepValue, Context);
-
-  return NewGV;
-}
-
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc.  Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
                                                      BitCastInst *BCI,
                                                      LLVMContext &Context,
                                                      TargetData* TD) {
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
+               << "  CALL = " << *CI << "  BCI = " << *BCI << '\n');
+
   const Type *IntPtrTy = TD->getIntPtrType(Context);
   
-  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *CI);
-
-  ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI,
-                                                                Context, TD));
+  Value* ArraySize = getMallocArraySize(CI, Context, TD);
+  assert(ArraySize && "not a malloc whose array size can be determined");
+  ConstantInt *NElements = cast<ConstantInt>(ArraySize);
   if (NElements->getZExtValue() != 1) {
     // If we have an array allocation, transform it to a single element
     // allocation to make the code below simpler.
@@ -976,9 +854,6 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
-  // FIXME: This new global should have the alignment returned by malloc.  Code
-  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
-  // this would only guarantee some lower alignment.
   const Type *MAT = getMallocAllocatedType(CI);
   Constant *Init = UndefValue::get(MAT);
   GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
@@ -1398,185 +1273,6 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
   }
 }
 
-/// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
-/// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI,
-                                            LLVMContext &Context){
-  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI);
-  const StructType *STy = cast<StructType>(MI->getAllocatedType());
-
-  // There is guaranteed to be at least one use of the malloc (storing
-  // it into GV).  If there are other uses, change them to be uses of
-  // the global to simplify later code.  This also deletes the store
-  // into GV.
-  ReplaceUsesOfMallocWithGlobal(MI, GV);
-  
-  // Okay, at this point, there are no users of the malloc.  Insert N
-  // new mallocs at the same place as MI, and N globals.
-  std::vector<Value*> FieldGlobals;
-  std::vector<MallocInst*> FieldMallocs;
-  
-  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
-    const Type *FieldTy = STy->getElementType(FieldNo);
-    const Type *PFieldTy = PointerType::getUnqual(FieldTy);
-    
-    GlobalVariable *NGV =
-      new GlobalVariable(*GV->getParent(),
-                         PFieldTy, false, GlobalValue::InternalLinkage,
-                         Constant::getNullValue(PFieldTy),
-                         GV->getName() + ".f" + Twine(FieldNo), GV,
-                         GV->isThreadLocal());
-    FieldGlobals.push_back(NGV);
-    
-    MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
-                                     MI->getName() + ".f" + Twine(FieldNo), MI);
-    FieldMallocs.push_back(NMI);
-    new StoreInst(NMI, NGV, MI);
-  }
-  
-  // The tricky aspect of this transformation is handling the case when malloc
-  // fails.  In the original code, malloc failing would set the result pointer
-  // of malloc to null.  In this case, some mallocs could succeed and others
-  // could fail.  As such, we emit code that looks like this:
-  //    F0 = malloc(field0)
-  //    F1 = malloc(field1)
-  //    F2 = malloc(field2)
-  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
-  //      if (F0) { free(F0); F0 = 0; }
-  //      if (F1) { free(F1); F1 = 0; }
-  //      if (F2) { free(F2); F2 = 0; }
-  //    }
-  Value *RunningOr = 0;
-  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
-    Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i],
-                              Constant::getNullValue(FieldMallocs[i]->getType()),
-                                  "isnull");
-    if (!RunningOr)
-      RunningOr = Cond;   // First seteq
-    else
-      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI);
-  }
-
-  // Split the basic block at the old malloc.
-  BasicBlock *OrigBB = MI->getParent();
-  BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont");
-  
-  // Create the block to check the first condition.  Put all these blocks at the
-  // end of the function as they are unlikely to be executed.
-  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
-                                                OrigBB->getParent());
-  
-  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
-  // branch on RunningOr.
-  OrigBB->getTerminator()->eraseFromParent();
-  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-  
-  // Within the NullPtrBlock, we need to emit a comparison and branch for each
-  // pointer, because some may be null while others are not.
-  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
-    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
-                              Constant::getNullValue(GVVal->getType()),
-                              "tmp");
-    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", 
-                                               OrigBB->getParent());
-    BasicBlock *NextBlock = BasicBlock::Create(Context, "next", 
-                                               OrigBB->getParent());
-    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
-
-    // Fill in FreeBlock.
-    new FreeInst(GVVal, FreeBlock);
-    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
-                  FreeBlock);
-    BranchInst::Create(NextBlock, FreeBlock);
-    
-    NullPtrBlock = NextBlock;
-  }
-  
-  BranchInst::Create(ContBB, NullPtrBlock);
-  
-  // MI is no longer needed, remove it.
-  MI->eraseFromParent();
-
-  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
-  /// update all uses of the load, keep track of what scalarized loads are
-  /// inserted for a given load.
-  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
-  InsertedScalarizedValues[GV] = FieldGlobals;
-  
-  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-  
-  // Okay, the malloc site is completely handled.  All of the uses of GV are now
-  // loads, and all uses of those loads are simple.  Rewrite them to use loads
-  // of the per-field globals instead.
-  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
-    Instruction *User = cast<Instruction>(*UI++);
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
-                                   Context);
-      continue;
-    }
-    
-    // Must be a store of null.
-    StoreInst *SI = cast<StoreInst>(User);
-    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
-           "Unexpected heap-sra user!");
-    
-    // Insert a store of null into each global.
-    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
-      Constant *Null = Constant::getNullValue(PT->getElementType());
-      new StoreInst(Null, FieldGlobals[i], SI);
-    }
-    // Erase the original store.
-    SI->eraseFromParent();
-  }
-
-  // While we have PHIs that are interesting to rewrite, do it.
-  while (!PHIsToRewrite.empty()) {
-    PHINode *PN = PHIsToRewrite.back().first;
-    unsigned FieldNo = PHIsToRewrite.back().second;
-    PHIsToRewrite.pop_back();
-    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
-    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
-
-    // Add all the incoming values.  This can materialize more phis.
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      Value *InVal = PN->getIncomingValue(i);
-      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
-                               PHIsToRewrite, Context);
-      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
-    }
-  }
-  
-  // Drop all inter-phi links and any loads that made it this far.
-  for (DenseMap<Value*, std::vector<Value*> >::iterator
-       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
-       I != E; ++I) {
-    if (PHINode *PN = dyn_cast<PHINode>(I->first))
-      PN->dropAllReferences();
-    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
-      LI->dropAllReferences();
-  }
-  
-  // Delete all the phis and loads now that inter-references are dead.
-  for (DenseMap<Value*, std::vector<Value*> >::iterator
-       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
-       I != E; ++I) {
-    if (PHINode *PN = dyn_cast<PHINode>(I->first))
-      PN->eraseFromParent();
-    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
-      LI->eraseFromParent();
-  }
-  
-  // The old global is now dead, remove it.
-  GV->eraseFromParent();
-
-  ++NumHeapSRA;
-  return cast<GlobalVariable>(FieldGlobals[0]);
-}
-
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
@@ -1587,6 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
                << " BITCAST = " << *BCI << '\n');
   const Type* MAT = getMallocAllocatedType(CI);
   const StructType *STy = cast<StructType>(MAT);
+  Value* ArraySize = getMallocArraySize(CI, Context, TD);
+  assert(ArraySize && "not a malloc whose array size can be determined");
 
   // There is guaranteed to be at least one use of the malloc (storing
   // it into GV).  If there are other uses, change them to be uses of
@@ -1611,8 +1309,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
     
-    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy,
-                                        getMallocArraySize(CI, Context, TD),
+    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                        FieldTy, ArraySize,
                                         BCI->getName() + ".f" + Twine(FieldNo));
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, BCI);
@@ -1766,95 +1464,6 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
 /// pointer global variable with a single value stored it that is a malloc or
 /// cast of malloc.
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
-                                               MallocInst *MI,
-                                               Module::global_iterator &GVI,
-                                               TargetData *TD,
-                                               LLVMContext &Context) {
-  // If this is a malloc of an abstract type, don't touch it.
-  if (!MI->getAllocatedType()->isSized())
-    return false;
-  
-  // We can't optimize this global unless all uses of it are *known* to be
-  // of the malloc value, not of the null initializer value (consider a use
-  // that compares the global's value against zero to see if the malloc has
-  // been reached).  To do this, we check to see if all uses of the global
-  // would trap if the global were null: this proves that they must all
-  // happen after the malloc.
-  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
-    return false;
-  
-  // We can't optimize this if the malloc itself is used in a complex way,
-  // for example, being stored into multiple globals.  This allows the
-  // malloc to be stored into the specified global, loaded setcc'd, and
-  // GEP'd.  These are all things we could transform to using the global
-  // for.
-  {
-    SmallPtrSet<PHINode*, 8> PHIs;
-    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs))
-      return false;
-  }
-  
-  
-  // If we have a global that is only initialized with a fixed size malloc,
-  // transform the program to use global memory instead of malloc'd memory.
-  // This eliminates dynamic allocation, avoids an indirection accessing the
-  // data, and exposes the resultant global to further GlobalOpt.
-  if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
-    // Restrict this transformation to only working on small allocations
-    // (2048 bytes currently), as we don't want to introduce a 16M global or
-    // something.
-    if (TD &&
-        NElements->getZExtValue()*
-        TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context);
-      return true;
-    }
-  }
-  
-  // If the allocation is an array of structures, consider transforming this
-  // into multiple malloc'd arrays, one for each field.  This is basically
-  // SRoA for malloc'd memory.
-  const Type *AllocTy = MI->getAllocatedType();
-  
-  // If this is an allocation of a fixed size array of structs, analyze as a
-  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-  if (!MI->isArrayAllocation())
-    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
-      AllocTy = AT->getElementType();
-  
-  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
-    // This the structure has an unreasonable number of fields, leave it
-    // alone.
-    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
-      
-      // If this is a fixed size array, transform the Malloc to be an alloc of
-      // structs.  malloc [100 x struct],1 -> malloc struct, 100
-      if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
-        MallocInst *NewMI = 
-          new MallocInst(AllocSTy, 
-                  ConstantInt::get(Type::getInt32Ty(Context),
-                  AT->getNumElements()),
-                         "", MI);
-        NewMI->takeName(MI);
-        Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
-        MI->replaceAllUsesWith(Cast);
-        MI->eraseFromParent();
-        MI = NewMI;
-      }
-      
-      GVI = PerformHeapAllocSRoA(GV, MI, Context);
-      return true;
-    }
-  }
-  
-  return false;
-}  
-
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                CallInst *CI,
                                                BitCastInst *BCI,
                                                Module::global_iterator &GVI,
@@ -1892,52 +1501,55 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // transform the program to use global memory instead of malloc'd memory.
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
-  if (ConstantInt *NElements =
-              dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) {
-    // Restrict this transformation to only working on small allocations
-    // (2048 bytes currently), as we don't want to introduce a 16M global or
-    // something.
-    if (TD && 
-        NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
-      return true;
-    }
-  }
-  
-  // If the allocation is an array of structures, consider transforming this
-  // into multiple malloc'd arrays, one for each field.  This is basically
-  // SRoA for malloc'd memory.
-
-  // If this is an allocation of a fixed size array of structs, analyze as a
-  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-  if (!isArrayMalloc(CI, Context, TD))
-    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
-      AllocTy = AT->getElementType();
-  
-  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
-    // This the structure has an unreasonable number of fields, leave it
-    // alone.
-    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
-
-      // If this is a fixed size array, transform the Malloc to be an alloc of
-      // structs.  malloc [100 x struct],1 -> malloc struct, 100
-      if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-        Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
-                                              AT->getNumElements());
-        Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
-                                              AllocSTy, NumElements,
-                                              BCI->getName());
-        Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
-        BCI->replaceAllUsesWith(Cast);
-        BCI->eraseFromParent();
-        CI->eraseFromParent();
-        BCI = cast<BitCastInst>(NewMI);
-        CI = extractMallocCallFromBitCast(NewMI);
+  Value *NElems = getMallocArraySize(CI, Context, TD);
+  // We cannot optimize the malloc if we cannot determine malloc array size.
+  if (NElems) {
+    if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+      // Restrict this transformation to only working on small allocations
+      // (2048 bytes currently), as we don't want to introduce a 16M global or
+      // something.
+      if (TD && 
+          NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD);
+        return true;
       }
+  
+    // If the allocation is an array of structures, consider transforming this
+    // into multiple malloc'd arrays, one for each field.  This is basically
+    // SRoA for malloc'd memory.
+
+    // If this is an allocation of a fixed size array of structs, analyze as a
+    // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+    if (!isArrayMalloc(CI, Context, TD))
+      if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+        AllocTy = AT->getElementType();
+  
+    if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+      // This the structure has an unreasonable number of fields, leave it
+      // alone.
+      if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+
+        // If this is a fixed size array, transform the Malloc to be an alloc of
+        // structs.  malloc [100 x struct],1 -> malloc struct, 100
+        if (const ArrayType *AT =
+                              dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+          Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
+                                                AT->getNumElements());
+          Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                                AllocSTy, NumElements,
+                                                BCI->getName());
+          Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
+          BCI->replaceAllUsesWith(Cast);
+          BCI->eraseFromParent();
+          CI->eraseFromParent();
+          BCI = cast<BitCastInst>(NewMI);
+          CI = extractMallocCallFromBitCast(NewMI);
+        }
       
-      GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
-      return true;
+        GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);
+        return true;
+      }
     }
   }
   
@@ -1966,9 +1578,6 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
       // Optimize away any trapping uses of the loaded value.
       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
         return true;
-    } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
-      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context))
-        return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
       if (getMallocAllocatedType(CI)) {
         BitCastInst* BCI = NULL;
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index 4c1f26d..deb4405 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -1,4 +1,4 @@
-//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===//
+//===- RaiseAllocations.cpp - Convert @free calls to insts ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the RaiseAllocations pass which convert malloc and free
-// calls to malloc and free instructions.
+// This file defines the RaiseAllocations pass which convert free calls to free
+// instructions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,19 +29,19 @@ using namespace llvm;
 STATISTIC(NumRaised, "Number of allocations raised");
 
 namespace {
-  // RaiseAllocations - Turn @malloc and @free calls into the appropriate
+  // RaiseAllocations - Turn @free calls into the appropriate
   // instruction.
   //
   class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass {
-    Function *MallocFunc;   // Functions in the module we are processing
-    Function *FreeFunc;     // Initialized by doPassInitializationVirt
+    Function *FreeFunc;   // Functions in the module we are processing
+                          // Initialized by doPassInitializationVirt
   public:
     static char ID; // Pass identification, replacement for typeid
     RaiseAllocations() 
-      : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {}
+      : ModulePass(&ID), FreeFunc(0) {}
 
     // doPassInitialization - For the raise allocations pass, this finds a
-    // declaration for malloc and free if they exist.
+    // declaration for free if it exists.
     //
     void doInitialization(Module &M);
 
@@ -61,50 +61,16 @@ ModulePass *llvm::createRaiseAllocationsPass() {
 }
 
 
-// If the module has a symbol table, they might be referring to the malloc and
-// free functions.  If this is the case, grab the method pointers that the
-// module is using.
+// If the module has a symbol table, they might be referring to the free 
+// function.  If this is the case, grab the method pointers that the module is
+// using.
 //
-// Lookup @malloc and @free in the symbol table, for later use.  If they don't
+// Lookup @free in the symbol table, for later use.  If they don't
 // exist, or are not external, we do not worry about converting calls to that
 // function into the appropriate instruction.
 //
 void RaiseAllocations::doInitialization(Module &M) {
-  // Get Malloc and free prototypes if they exist!
-  MallocFunc = M.getFunction("malloc");
-  if (MallocFunc) {
-    const FunctionType* TyWeHave = MallocFunc->getFunctionType();
-
-    // Get the expected prototype for malloc
-    const FunctionType *Malloc1Type = 
-      FunctionType::get(Type::getInt8PtrTy(M.getContext()),
-                      std::vector<const Type*>(1,
-                                      Type::getInt64Ty(M.getContext())), false);
-
-    // Chck to see if we got the expected malloc
-    if (TyWeHave != Malloc1Type) {
-      // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
-      // This handles the common declaration of: 'void *malloc(unsigned);'
-      const FunctionType *Malloc2Type = 
-        FunctionType::get(PointerType::getUnqual(
-                          Type::getInt8Ty(M.getContext())),
-                          std::vector<const Type*>(1, 
-                                      Type::getInt32Ty(M.getContext())), false);
-      if (TyWeHave != Malloc2Type) {
-        // Check to see if the prototype is missing, giving us 
-        // i8*(...) * malloc
-        // This handles the common declaration of: 'void *malloc();'
-        const FunctionType *Malloc3Type = 
-          FunctionType::get(PointerType::getUnqual(
-                                    Type::getInt8Ty(M.getContext())), 
-                                    true);
-        if (TyWeHave != Malloc3Type)
-          // Give up
-          MallocFunc = 0;
-      }
-    }
-  }
-
+  // Get free prototype if it exists!
   FreeFunc = M.getFunction("free");
   if (FreeFunc) {
     const FunctionType* TyWeHave = FreeFunc->getFunctionType();
@@ -138,72 +104,18 @@ void RaiseAllocations::doInitialization(Module &M) {
   }
 
   // Don't mess with locally defined versions of these functions...
-  if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0;
   if (FreeFunc && !FreeFunc->isDeclaration())     FreeFunc = 0;
 }
 
 // run - Transform calls into instructions...
 //
 bool RaiseAllocations::runOnModule(Module &M) {
-  // Find the malloc/free prototypes...
+  // Find the free prototype...
   doInitialization(M);
   
   bool Changed = false;
 
-  // First, process all of the malloc calls...
-  if (MallocFunc) {
-    std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end());
-    std::vector<Value*> EqPointers;   // Values equal to MallocFunc
-    while (!Users.empty()) {
-      User *U = Users.back();
-      Users.pop_back();
-
-      if (Instruction *I = dyn_cast<Instruction>(U)) {
-        CallSite CS = CallSite::get(I);
-        if (CS.getInstruction() && !CS.arg_empty() &&
-            (CS.getCalledFunction() == MallocFunc ||
-             std::find(EqPointers.begin(), EqPointers.end(),
-                       CS.getCalledValue()) != EqPointers.end())) {
-
-          Value *Source = *CS.arg_begin();
-
-          // If no prototype was provided for malloc, we may need to cast the
-          // source size.
-          if (Source->getType() != Type::getInt32Ty(M.getContext()))
-            Source = 
-              CastInst::CreateIntegerCast(Source, 
-                                          Type::getInt32Ty(M.getContext()), 
-                                          false/*ZExt*/,
-                                          "MallocAmtCast", I);
-
-          MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()),
-                                          Source, "", I);
-          MI->takeName(I);
-          I->replaceAllUsesWith(MI);
-
-          // If the old instruction was an invoke, add an unconditional branch
-          // before the invoke, which will become the new terminator.
-          if (InvokeInst *II = dyn_cast<InvokeInst>(I))
-            BranchInst::Create(II->getNormalDest(), I);
-
-          // Delete the old call site
-          I->eraseFromParent();
-          Changed = true;
-          ++NumRaised;
-        }
-      } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) {
-        Users.insert(Users.end(), GV->use_begin(), GV->use_end());
-        EqPointers.push_back(GV);
-      } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-        if (CE->isCast()) {
-          Users.insert(Users.end(), CE->use_begin(), CE->use_end());
-          EqPointers.push_back(CE);
-        }
-      }
-    }
-  }
-
-  // Next, process all free calls...
+  // Process all free calls...
   if (FreeFunc) {
     std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end());
     std::vector<Value*> EqPointers;   // Values equal to FreeFunc
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 77d44b2..57aaf43 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -250,8 +250,6 @@ static bool StripDebugInfo(Module &M) {
   if (NMD)
     NMD->eraseFromParent();
 
-  // Remove dead metadata.
-  M.getContext().RemoveDeadMetadata();
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a3e3fea..42209b8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -73,6 +73,7 @@ namespace {
                             DenseMap<Value*,Value*> &SunkAddrs);
     bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
                                DenseMap<Value*,Value*> &SunkAddrs);
+    bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
     void findLoopBackEdges(const Function &F);
   };
@@ -731,6 +732,43 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
   return MadeChange;
 }
 
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+  // Look for a load being extended.
+  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+  if (!LI) return false;
+
+  // If they're already in the same block, there's nothing to do.
+  if (LI->getParent() == I->getParent())
+    return false;
+
+  // If the load has other users and the truncate is not free, this probably
+  // isn't worthwhile.
+  if (!LI->hasOneUse() &&
+      TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+    return false;
+
+  // Check whether the target supports casts folded into loads.
+  unsigned LType;
+  if (isa<ZExtInst>(I))
+    LType = ISD::ZEXTLOAD;
+  else {
+    assert(isa<SExtInst>(I) && "Unexpected ext type!");
+    LType = ISD::SEXTLOAD;
+  }
+  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+    return false;
+
+  // Move the extend into the same block as the load, so that SelectionDAG
+  // can fold it.
+  I->removeFromParent();
+  I->insertAfter(LI);
+  return true;
+}
+
 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   BasicBlock *DefBB = I->getParent();
 
@@ -846,8 +884,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
         MadeChange |= Change;
       }
 
-      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
+      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
+        MadeChange |= MoveExtToFormExtLoad(I);
         MadeChange |= OptimizeExtUses(I);
+      }
     } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
       MadeChange |= OptimizeCmpExpression(CI);
     } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2ed4a63..8859324 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -78,13 +78,10 @@ namespace {
                             SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
                             FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
                             PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
-                            EMPTY, TOMBSTONE };
+                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
 
     ExpressionOpcode opcode;
     const Type* type;
-    uint32_t firstVN;
-    uint32_t secondVN;
-    uint32_t thirdVN;
     SmallVector<uint32_t, 4> varargs;
     Value *function;
 
@@ -100,12 +97,6 @@ namespace {
         return false;
       else if (function != other.function)
         return false;
-      else if (firstVN != other.firstVN)
-        return false;
-      else if (secondVN != other.secondVN)
-        return false;
-      else if (thirdVN != other.thirdVN)
-        return false;
       else {
         if (varargs.size() != other.varargs.size())
           return false;
@@ -146,6 +137,10 @@ namespace {
       Expression create_expression(GetElementPtrInst* G);
       Expression create_expression(CallInst* C);
       Expression create_expression(Constant* C);
+      Expression create_expression(ExtractValueInst* C);
+      Expression create_expression(InsertValueInst* C);
+      
+      uint32_t lookup_or_add_call(CallInst* C);
     public:
       ValueTable() : nextValueNumber(1) { }
       uint32_t lookup_or_add(Value *V);
@@ -176,13 +171,8 @@ template <> struct DenseMapInfo<Expression> {
   static unsigned getHashValue(const Expression e) {
     unsigned hash = e.opcode;
 
-    hash = e.firstVN + hash * 37;
-    hash = e.secondVN + hash * 37;
-    hash = e.thirdVN + hash * 37;
-
     hash = ((unsigned)((uintptr_t)e.type >> 4) ^
-            (unsigned)((uintptr_t)e.type >> 9)) +
-           hash * 37;
+            (unsigned)((uintptr_t)e.type >> 9));
 
     for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
          E = e.varargs.end(); I != E; ++I)
@@ -290,9 +280,6 @@ Expression ValueTable::create_expression(CallInst* C) {
   Expression e;
 
   e.type = C->getType();
-  e.firstVN = 0;
-  e.secondVN = 0;
-  e.thirdVN = 0;
   e.function = C->getCalledFunction();
   e.opcode = Expression::CALL;
 
@@ -305,10 +292,8 @@ Expression ValueTable::create_expression(CallInst* C) {
 
 Expression ValueTable::create_expression(BinaryOperator* BO) {
   Expression e;
-
-  e.firstVN = lookup_or_add(BO->getOperand(0));
-  e.secondVN = lookup_or_add(BO->getOperand(1));
-  e.thirdVN = 0;
+  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
+  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
   e.function = 0;
   e.type = BO->getType();
   e.opcode = getOpcode(BO);
@@ -319,9 +304,8 @@ Expression ValueTable::create_expression(BinaryOperator* BO) {
 Expression ValueTable::create_expression(CmpInst* C) {
   Expression e;
 
-  e.firstVN = lookup_or_add(C->getOperand(0));
-  e.secondVN = lookup_or_add(C->getOperand(1));
-  e.thirdVN = 0;
+  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
+  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
@@ -332,9 +316,7 @@ Expression ValueTable::create_expression(CmpInst* C) {
 Expression ValueTable::create_expression(CastInst* C) {
   Expression e;
 
-  e.firstVN = lookup_or_add(C->getOperand(0));
-  e.secondVN = 0;
-  e.thirdVN = 0;
+  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
   e.function = 0;
   e.type = C->getType();
   e.opcode = getOpcode(C);
@@ -345,9 +327,9 @@ Expression ValueTable::create_expression(CastInst* C) {
 Expression ValueTable::create_expression(ShuffleVectorInst* S) {
   Expression e;
 
-  e.firstVN = lookup_or_add(S->getOperand(0));
-  e.secondVN = lookup_or_add(S->getOperand(1));
-  e.thirdVN = lookup_or_add(S->getOperand(2));
+  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
+  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
+  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
   e.function = 0;
   e.type = S->getType();
   e.opcode = Expression::SHUFFLE;
@@ -358,9 +340,8 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) {
 Expression ValueTable::create_expression(ExtractElementInst* E) {
   Expression e;
 
-  e.firstVN = lookup_or_add(E->getOperand(0));
-  e.secondVN = lookup_or_add(E->getOperand(1));
-  e.thirdVN = 0;
+  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
+  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
   e.function = 0;
   e.type = E->getType();
   e.opcode = Expression::EXTRACT;
@@ -371,9 +352,9 @@ Expression ValueTable::create_expression(ExtractElementInst* E) {
 Expression ValueTable::create_expression(InsertElementInst* I) {
   Expression e;
 
-  e.firstVN = lookup_or_add(I->getOperand(0));
-  e.secondVN = lookup_or_add(I->getOperand(1));
-  e.thirdVN = lookup_or_add(I->getOperand(2));
+  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
+  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
+  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::INSERT;
@@ -384,9 +365,9 @@ Expression ValueTable::create_expression(InsertElementInst* I) {
 Expression ValueTable::create_expression(SelectInst* I) {
   Expression e;
 
-  e.firstVN = lookup_or_add(I->getCondition());
-  e.secondVN = lookup_or_add(I->getTrueValue());
-  e.thirdVN = lookup_or_add(I->getFalseValue());
+  e.varargs.push_back(lookup_or_add(I->getCondition()));
+  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
+  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
   e.function = 0;
   e.type = I->getType();
   e.opcode = Expression::SELECT;
@@ -397,9 +378,7 @@ Expression ValueTable::create_expression(SelectInst* I) {
 Expression ValueTable::create_expression(GetElementPtrInst* G) {
   Expression e;
 
-  e.firstVN = lookup_or_add(G->getPointerOperand());
-  e.secondVN = 0;
-  e.thirdVN = 0;
+  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
   e.function = 0;
   e.type = G->getType();
   e.opcode = Expression::GEP;
@@ -411,6 +390,35 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) {
   return e;
 }
 
+Expression ValueTable::create_expression(ExtractValueInst* E) {
+  Expression e;
+
+  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+       II != IE; ++II)
+    e.varargs.push_back(*II);
+  e.function = 0;
+  e.type = E->getType();
+  e.opcode = Expression::EXTRACTVALUE;
+
+  return e;
+}
+
+Expression ValueTable::create_expression(InsertValueInst* E) {
+  Expression e;
+
+  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
+  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
+  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+       II != IE; ++II)
+    e.varargs.push_back(*II);
+  e.function = 0;
+  e.type = E->getType();
+  e.opcode = Expression::INSERTVALUE;
+
+  return e;
+}
+
 //===----------------------------------------------------------------------===//
 //                     ValueTable External Functions
 //===----------------------------------------------------------------------===//
@@ -420,232 +428,197 @@ void ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
 }
 
-/// lookup_or_add - Returns the value number for the specified value, assigning
-/// it a new number if it did not have one before.
-uint32_t ValueTable::lookup_or_add(Value *V) {
-  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
-  if (VI != valueNumbering.end())
-    return VI->second;
-
-  if (CallInst* C = dyn_cast<CallInst>(V)) {
-    if (AA->doesNotAccessMemory(C)) {
-      Expression e = create_expression(C);
-
-      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-      if (EI != expressionNumbering.end()) {
-        valueNumbering.insert(std::make_pair(V, EI->second));
-        return EI->second;
-      } else {
-        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-        valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
-        return nextValueNumber++;
-      }
-    } else if (AA->onlyReadsMemory(C)) {
-      Expression e = create_expression(C);
-
-      if (expressionNumbering.find(e) == expressionNumbering.end()) {
-        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-        valueNumbering.insert(std::make_pair(V, nextValueNumber));
-        return nextValueNumber++;
-      }
-
-      MemDepResult local_dep = MD->getDependency(C);
-
-      if (!local_dep.isDef() && !local_dep.isNonLocal()) {
-        valueNumbering.insert(std::make_pair(V, nextValueNumber));
-        return nextValueNumber++;
-      }
-
-      if (local_dep.isDef()) {
-        CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
-
-        if (local_cdep->getNumOperands() != C->getNumOperands()) {
-          valueNumbering.insert(std::make_pair(V, nextValueNumber));
-          return nextValueNumber++;
-        }
-
-        for (unsigned i = 1; i < C->getNumOperands(); ++i) {
-          uint32_t c_vn = lookup_or_add(C->getOperand(i));
-          uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
-          if (c_vn != cd_vn) {
-            valueNumbering.insert(std::make_pair(V, nextValueNumber));
-            return nextValueNumber++;
-          }
-        }
-
-        uint32_t v = lookup_or_add(local_cdep);
-        valueNumbering.insert(std::make_pair(V, v));
-        return v;
-      }
-
-      // Non-local case.
-      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
-        MD->getNonLocalCallDependency(CallSite(C));
-      // FIXME: call/call dependencies for readonly calls should return def, not
-      // clobber!  Move the checking logic to MemDep!
-      CallInst* cdep = 0;
-
-      // Check to see if we have a single dominating call instruction that is
-      // identical to C.
-      for (unsigned i = 0, e = deps.size(); i != e; ++i) {
-        const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
-        // Ignore non-local dependencies.
-        if (I->second.isNonLocal())
-          continue;
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+  if (AA->doesNotAccessMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) e = nextValueNumber++;
+    valueNumbering[C] = e;
+    return e;
+  } else if (AA->onlyReadsMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
 
-        // We don't handle non-depedencies.  If we already have a call, reject
-        // instruction dependencies.
-        if (I->second.isClobber() || cdep != 0) {
-          cdep = 0;
-          break;
-        }
+    MemDepResult local_dep = MD->getDependency(C);
 
-        CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
-        // FIXME: All duplicated with non-local case.
-        if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
-          cdep = NonLocalDepCall;
-          continue;
-        }
+    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+      valueNumbering[C] =  nextValueNumber;
+      return nextValueNumber++;
+    }
 
-        cdep = 0;
-        break;
-      }
+    if (local_dep.isDef()) {
+      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
 
-      if (!cdep) {
-        valueNumbering.insert(std::make_pair(V, nextValueNumber));
+      if (local_cdep->getNumOperands() != C->getNumOperands()) {
+        valueNumbering[C] = nextValueNumber;
         return nextValueNumber++;
       }
 
-      if (cdep->getNumOperands() != C->getNumOperands()) {
-        valueNumbering.insert(std::make_pair(V, nextValueNumber));
-        return nextValueNumber++;
-      }
       for (unsigned i = 1; i < C->getNumOperands(); ++i) {
         uint32_t c_vn = lookup_or_add(C->getOperand(i));
-        uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+        uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
         if (c_vn != cd_vn) {
-          valueNumbering.insert(std::make_pair(V, nextValueNumber));
+          valueNumbering[C] = nextValueNumber;
           return nextValueNumber++;
         }
       }
 
-      uint32_t v = lookup_or_add(cdep);
-      valueNumbering.insert(std::make_pair(V, v));
+      uint32_t v = lookup_or_add(local_cdep);
+      valueNumbering[C] = v;
       return v;
-
-    } else {
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
-      return nextValueNumber++;
     }
-  } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
-    Expression e = create_expression(BO);
 
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
+    // Non-local case.
+    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+      MD->getNonLocalCallDependency(CallSite(C));
+    // FIXME: call/call dependencies for readonly calls should return def, not
+    // clobber!  Move the checking logic to MemDep!
+    CallInst* cdep = 0;
+
+    // Check to see if we have a single dominating call instruction that is
+    // identical to C.
+    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+      const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+      // Ignore non-local dependencies.
+      if (I->second.isNonLocal())
+        continue;
 
-      return nextValueNumber++;
-    }
-  } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
-    Expression e = create_expression(C);
+      // We don't handle non-depedencies.  If we already have a call, reject
+      // instruction dependencies.
+      if (I->second.isClobber() || cdep != 0) {
+        cdep = 0;
+        break;
+      }
 
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+      // FIXME: All duplicated with non-local case.
+      if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+        cdep = NonLocalDepCall;
+        continue;
+      }
 
-      return nextValueNumber++;
+      cdep = 0;
+      break;
     }
-  } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
-    Expression e = create_expression(U);
-
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
 
+    if (!cdep) {
+      valueNumbering[C] = nextValueNumber;
       return nextValueNumber++;
     }
-  } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
-    Expression e = create_expression(U);
-
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
 
+    if (cdep->getNumOperands() != C->getNumOperands()) {
+      valueNumbering[C] = nextValueNumber;
       return nextValueNumber++;
     }
-  } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
-    Expression e = create_expression(U);
-
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
-      return nextValueNumber++;
+    for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+      uint32_t c_vn = lookup_or_add(C->getOperand(i));
+      uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+      if (c_vn != cd_vn) {
+        valueNumbering[C] = nextValueNumber;
+        return nextValueNumber++;
+      }
     }
-  } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
-    Expression e = create_expression(U);
 
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
+    uint32_t v = lookup_or_add(cdep);
+    valueNumbering[C] = v;
+    return v;
 
-      return nextValueNumber++;
-    }
-  } else if (CastInst* U = dyn_cast<CastInst>(V)) {
-    Expression e = create_expression(U);
-
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
-
-      return nextValueNumber++;
-    }
-  } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
-    Expression e = create_expression(U);
+  } else {
+    valueNumbering[C] = nextValueNumber;
+    return nextValueNumber++;
+  }
+}
 
-    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
-    if (EI != expressionNumbering.end()) {
-      valueNumbering.insert(std::make_pair(V, EI->second));
-      return EI->second;
-    } else {
-      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
-      valueNumbering.insert(std::make_pair(V, nextValueNumber));
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+  if (VI != valueNumbering.end())
+    return VI->second;
 
-      return nextValueNumber++;
-    }
-  } else {
-    valueNumbering.insert(std::make_pair(V, nextValueNumber));
+  if (!isa<Instruction>(V)) {
+    valueNumbering[V] = nextValueNumber;
     return nextValueNumber++;
   }
+  
+  Instruction* I = cast<Instruction>(V);
+  Expression exp;
+  switch (I->getOpcode()) {
+    case Instruction::Call:
+      return lookup_or_add_call(cast<CallInst>(I));
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or :
+    case Instruction::Xor:
+      exp = create_expression(cast<BinaryOperator>(I));
+      break;
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      exp = create_expression(cast<CmpInst>(I));
+      break;
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+      exp = create_expression(cast<CastInst>(I));
+      break;
+    case Instruction::Select:
+      exp = create_expression(cast<SelectInst>(I));
+      break;
+    case Instruction::ExtractElement:
+      exp = create_expression(cast<ExtractElementInst>(I));
+      break;
+    case Instruction::InsertElement:
+      exp = create_expression(cast<InsertElementInst>(I));
+      break;
+    case Instruction::ShuffleVector:
+      exp = create_expression(cast<ShuffleVectorInst>(I));
+      break;
+    case Instruction::ExtractValue:
+      exp = create_expression(cast<ExtractValueInst>(I));
+      break;
+    case Instruction::InsertValue:
+      exp = create_expression(cast<InsertValueInst>(I));
+      break;      
+    case Instruction::GetElementPtr:
+      exp = create_expression(cast<GetElementPtrInst>(I));
+      break;
+    default:
+      valueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+  }
+
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  valueNumbering[V] = e;
+  return e;
 }
 
 /// lookup - Returns the value number of the specified value. Fails if
@@ -1557,15 +1530,18 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // actually have the same type.  See if we know how to reuse the stored
     // value (depending on its type).
     const TargetData *TD = 0;
-    if (StoredVal->getType() != L->getType() &&
-        (TD = getAnalysisIfAvailable<TargetData>())) {
-      StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
-                                                 L, *TD);
-      if (StoredVal == 0)
+    if (StoredVal->getType() != L->getType()) {
+      if ((TD = getAnalysisIfAvailable<TargetData>())) {
+        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+                                                   L, *TD);
+        if (StoredVal == 0)
+          return false;
+        
+        DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                     << '\n' << *L << "\n\n\n");
+      }
+      else 
         return false;
-      
-      DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
-                   << '\n' << *L << "\n\n\n");
     }
 
     // Remove it!
@@ -1584,14 +1560,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // the same type.  See if we know how to reuse the previously loaded value
     // (depending on its type).
     const TargetData *TD = 0;
-    if (DepLI->getType() != L->getType() &&
-        (TD = getAnalysisIfAvailable<TargetData>())) {
-      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
-      if (AvailableVal == 0)
-        return false;
+    if (DepLI->getType() != L->getType()) {
+      if ((TD = getAnalysisIfAvailable<TargetData>())) {
+        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+        if (AvailableVal == 0)
+          return false;
       
-      DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
-                   << "\n" << *L << "\n\n\n");
+        DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                     << "\n" << *L << "\n\n\n");
+      }
+      else 
+        return false;
     }
     
     // Remove it!
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index f635af3..b41b5d4 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -6300,25 +6300,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
         break;
       }
-      case Instruction::Malloc:
-        // If we have (malloc != null), and if the malloc has a single use, we
-        // can assume it is successful and remove the malloc.
-        if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
-          Worklist.Add(LHSI);
-          return ReplaceInstUsesWith(I,
-                                     ConstantInt::get(Type::getInt1Ty(*Context),
-                                                      !I.isTrueWhenEqual()));
-        }
-        break;
       case Instruction::Call:
         // If we have (malloc != null), and if the malloc has a single use, we
         // can assume it is successful and remove the malloc.
         if (isMalloc(LHSI) && LHSI->hasOneUse() &&
             isa<ConstantPointerNull>(RHSC)) {
-          Worklist.Add(LHSI);
-          return ReplaceInstUsesWith(I,
+          // Need to explicitly erase malloc call here, instead of adding it to
+          // Worklist, because it won't get DCE'd from the Worklist since
+          // isInstructionTriviallyDead() returns false for function calls.
+          // It is OK to replace LHSI/MallocCall with Undef because the 
+          // instruction that uses it will be erased via Worklist.
+          if (extractMallocCall(LHSI)) {
+            LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
+            EraseInstFromFunction(*LHSI);
+            return ReplaceInstUsesWith(I,
                                      ConstantInt::get(Type::getInt1Ty(*Context),
                                                       !I.isTrueWhenEqual()));
+          }
+          if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
+            if (MallocCall->hasOneUse()) {
+              MallocCall->replaceAllUsesWith(
+                                        UndefValue::get(MallocCall->getType()));
+              EraseInstFromFunction(*MallocCall);
+              Worklist.Add(LHSI); // The malloc's bitcast use.
+              return ReplaceInstUsesWith(I,
+                                     ConstantInt::get(Type::getInt1Ty(*Context),
+                                                      !I.isTrueWhenEqual()));
+            }
         }
         break;
       }
@@ -7809,11 +7817,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
   }
   
-  AllocationInst *New;
-  if (isa<MallocInst>(AI))
-    New = AllocaBuilder.CreateMalloc(CastElTy, Amt);
-  else
-    New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  AllocationInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
   New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
   
@@ -9270,14 +9274,44 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   return Changed ? &SI : 0;
 }
 
-/// isDefinedInBB - Return true if the value is an instruction defined in the
-/// specified basicblock.
-static bool isDefinedInBB(const Value *V, const BasicBlock *BB) {
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks).  See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI.  For
+/// example, cases like this cannot be mapped:
+///
+///   X = phi [ C1, BB1], [C2, BB2]
+///   Y = add
+///   Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+                                                   const SelectInst &SI) {
+  // If the value is a non-instruction value like a constant or argument, it
+  // can always be mapped.
   const Instruction *I = dyn_cast<Instruction>(V);
-  return I != 0 && I->getParent() == BB;
+  if (I == 0) return true;
+  
+  // If V is a PHI node defined in the same block as the condition PHI, we can
+  // map the arguments.
+  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+  
+  if (const PHINode *VP = dyn_cast<PHINode>(I))
+    if (VP->getParent() == CondPHI->getParent())
+      return true;
+  
+  // Otherwise, if the PHI and select are defined in the same block and if V is
+  // defined in a different block, then we can transform it.
+  if (SI.getParent() == CondPHI->getParent() &&
+      I->getParent() != CondPHI->getParent())
+    return true;
+  
+  // Otherwise we have a 'hard' case and we can't tell without doing more
+  // detailed dominator based analysis, punt.
+  return false;
 }
 
-
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -9489,16 +9523,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return FoldI;
   }
 
-  // See if we can fold the select into a phi node.  The true/false values have
-  // to be live in the predecessor blocks.  If they are instructions in SI's
-  // block, we can't map to the predecessor.
-  if (isa<PHINode>(SI.getCondition()) &&
-      (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) ||
-       isa<PHINode>(SI.getTrueValue())) &&
-      (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) ||
-       isa<PHINode>(SI.getFalseValue())))
-    if (Instruction *NV = FoldOpIntoPhi(SI))
-      return NV;
+  // See if we can fold the select into a phi node if the condition is a select.
+  if (isa<PHINode>(SI.getCondition())) 
+    // The true/false values have to be live in the PHI predecessor's blocks.
+    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+      if (Instruction *NV = FoldOpIntoPhi(SI))
+        return NV;
 
   if (BinaryOperator::isNot(CondVal)) {
     SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
@@ -11213,15 +11244,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-      AllocationInst *New = 0;
-
-      // Create and insert the replacement instruction...
-      if (isa<MallocInst>(AI))
-        New = Builder->CreateMalloc(NewTy, 0, AI.getName());
-      else {
-        assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-        New = Builder->CreateAlloca(NewTy, 0, AI.getName());
-      }
+      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+      AllocationInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       New->setAlignment(AI.getAlignment());
 
       // Scan to the end of the allocation instructions, to skip over a block of
@@ -11294,12 +11318,6 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
     }
   }
   
-  // Change free(malloc) into nothing, if the malloc has a single use.
-  if (MallocInst *MI = dyn_cast<MallocInst>(Op))
-    if (MI->hasOneUse()) {
-      EraseInstFromFunction(FI);
-      return EraseInstFromFunction(*MI);
-    }
   if (isMalloc(Op)) {
     if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
       if (Op->hasOneUse() && CI->hasOneUse()) {
@@ -11327,40 +11345,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
   Value *CastOp = CI->getOperand(0);
   LLVMContext *Context = IC.getContext();
 
-  if (TD) {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
-      // Instead of loading constant c string, use corresponding integer value
-      // directly if string length is small enough.
-      std::string Str;
-      if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
-        unsigned len = Str.length();
-        const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
-        unsigned numBits = Ty->getPrimitiveSizeInBits();
-        // Replace LI with immediate integer store.
-        if ((numBits >> 3) == len + 1) {
-          APInt StrVal(numBits, 0);
-          APInt SingleChar(numBits, 0);
-          if (TD->isLittleEndian()) {
-            for (signed i = len-1; i >= 0; i--) {
-              SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
-              StrVal = (StrVal << 8) | SingleChar;
-            }
-          } else {
-            for (unsigned i = 0; i < len; i++) {
-              SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
-              StrVal = (StrVal << 8) | SingleChar;
-            }
-            // Append NULL at the end.
-            SingleChar = 0;
-            StrVal = (StrVal << 8) | SingleChar;
-          }
-          Value *NL = ConstantInt::get(*Context, StrVal);
-          return IC.ReplaceInstUsesWith(LI, NL);
-        }
-      }
-    }
-  }
-
   const PointerType *DestTy = cast<PointerType>(CI->getType());
   const Type *DestPTy = DestTy->getElementType();
   if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
@@ -11380,7 +11364,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
             Value *Idxs[2];
-            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+            Idxs[1] = Idxs[0];
             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
             SrcTy = cast<PointerType>(CastOp->getType());
             SrcPTy = SrcTy->getElementType();
@@ -11436,6 +11421,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
     return ReplaceInstUsesWith(LI, AvailableVal);
 
+  // load(gep null, ...) -> unreachable
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
     const Value *GEPI0 = GEPI->getOperand(0);
     // TODO: Consider a target hook for valid address spaces for this xform.
@@ -11450,60 +11436,24 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     }
   } 
 
-  if (Constant *C = dyn_cast<Constant>(Op)) {
-    // load null/undef -> undef
-    // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<UndefValue>(C) ||
-        (C->isNullValue() && LI.getPointerAddressSpace() == 0)) {
-      // Insert a new store to null instruction before the load to indicate that
-      // this code is not reachable.  We do this instead of inserting an
-      // unreachable instruction directly because we cannot modify the CFG.
-      new StoreInst(UndefValue::get(LI.getType()),
-                    Constant::getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-    }
-
-    // Instcombine load (constant global) into the value loaded.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op))
-      if (GV->isConstant() && GV->hasDefinitiveInitializer())
-        return ReplaceInstUsesWith(LI, GV->getInitializer());
-
-    // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr) {
-        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
-          if (GV->isConstant() && GV->hasDefinitiveInitializer())
-            if (Constant *V = 
-               ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
-              return ReplaceInstUsesWith(LI, V);
-        if (CE->getOperand(0)->isNullValue()) {
-          // Insert a new store to null instruction before the load to indicate
-          // that this code is not reachable.  We do this instead of inserting
-          // an unreachable instruction directly because we cannot modify the
-          // CFG.
-          new StoreInst(UndefValue::get(LI.getType()),
-                        Constant::getNullValue(Op->getType()), &LI);
-          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-        }
-
-      } else if (CE->isCast()) {
-        if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
-          return Res;
-      }
-    }
-  }
-    
-  // If this load comes from anywhere in a constant global, and if the global
-  // is all undef or zero, we know what it loads.
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){
-    if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
-      if (GV->getInitializer()->isNullValue())
-        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
-      else if (isa<UndefValue>(GV->getInitializer()))
-        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-    }
+  // load null/undef -> unreachable
+  // TODO: Consider a target hook for valid address spaces for this xform.
+  if (isa<UndefValue>(Op) ||
+      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+    // Insert a new store to null instruction before the load to indicate that
+    // this code is not reachable.  We do this instead of inserting an
+    // unreachable instruction directly because we cannot modify the CFG.
+    new StoreInst(UndefValue::get(LI.getType()),
+                  Constant::getNullValue(Op->getType()), &LI);
+    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
   }
 
+  // Instcombine load (constantexpr_cast global) -> cast (load global)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+        return Res;
+  
   if (Op->hasOneUse()) {
     // Change select and PHI nodes to select values instead of addresses: this
     // helps alias analysis out a lot, allows many others simplifications, and
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index f6de362..223d2b9 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -138,7 +138,6 @@ namespace {
     void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
 
     bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
-    unsigned getLoopUnswitchCost(Value *LIC);
     void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
                                   BasicBlock *ExitBlock);
     void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
@@ -400,25 +399,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
   return true;
 }
 
-/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if
-/// we choose to unswitch current loop on the specified value.
-///
-unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
-  // If the condition is trivial, always unswitch.  There is no code growth for
-  // this case.
-  if (IsTrivialUnswitchCondition(LIC))
-    return 0;
-  
-  // FIXME: This is overly conservative because it does not take into
-  // consideration code simplification opportunities.
-  CodeMetrics Metrics;
-  for (Loop::block_iterator I = currentLoop->block_begin(), 
-         E = currentLoop->block_end();
-       I != E; ++I)
-    Metrics.analyzeBasicBlock(*I);
-  return Metrics.NumInsts;
-}
-
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
 /// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
@@ -427,24 +407,35 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
   initLoopData();
   Function *F = loopHeader->getParent();
 
+  // If the condition is trivial, always unswitch.  There is no code growth for
+  // this case.
+  if (!IsTrivialUnswitchCondition(LoopCond)) {
+    // Check to see if it would be profitable to unswitch current loop.
 
-  // Check to see if it would be profitable to unswitch current loop.
-  unsigned Cost = getLoopUnswitchCost(LoopCond);
-
-  // Do not do non-trivial unswitch while optimizing for size.
-  if (Cost && OptimizeForSize)
-    return false;
-  if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize))
-    return false;
+    // Do not do non-trivial unswitch while optimizing for size.
+    if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+      return false;
 
-  if (Cost > Threshold) {
-    // FIXME: this should estimate growth by the amount of code shared by the
-    // resultant unswitched loops.
-    //
-    DEBUG(errs() << "NOT unswitching loop %"
-          << currentLoop->getHeader()->getName() << ", cost too high: "
-          << currentLoop->getBlocks().size() << "\n");
-    return false;
+    // FIXME: This is overly conservative because it does not take into
+    // consideration code simplification opportunities and code that can
+    // be shared by the resultant unswitched loops.
+    CodeMetrics Metrics;
+    for (Loop::block_iterator I = currentLoop->block_begin(), 
+           E = currentLoop->block_end();
+         I != E; ++I)
+      Metrics.analyzeBasicBlock(*I);
+
+    // Limit the number of instructions to avoid causing significant code
+    // expansion, and the number of basic blocks, to avoid loops with
+    // large numbers of branches which cause loop unswitching to go crazy.
+    // This is a very ad-hoc heuristic.
+    if (Metrics.NumInsts > Threshold ||
+        Metrics.NumBlocks * 5 > Threshold) {
+      DEBUG(errs() << "NOT unswitching loop %"
+            << currentLoop->getHeader()->getName() << ", cost too high: "
+            << currentLoop->getBlocks().size() << "\n");
+      return false;
+    }
   }
 
   Constant *CondVal;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e6ffac2..af29f97 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -121,7 +121,6 @@ static bool isUnmovableInstruction(Instruction *I) {
   if (I->getOpcode() == Instruction::PHI ||
       I->getOpcode() == Instruction::Alloca ||
       I->getOpcode() == Instruction::Load ||
-      I->getOpcode() == Instruction::Malloc ||
       I->getOpcode() == Instruction::Invoke ||
       (I->getOpcode() == Instruction::Call &&
        !isa<DbgInfoIntrinsic>(I)) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b5edf4e..b745097 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1229,7 +1229,10 @@ CallOverdefined:
     TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0));
     if (TMRVI == TrackedMultipleRetVals.end())
       goto CallOverdefined;
-    
+
+    // Need to mark as overdefined, otherwise it stays undefined which
+    // creates extractvalue undef, <idx>
+    markOverdefined(I);
     // If we are tracking this callee, propagate the return values of the call
     // into this call site.  We do this by walking all the uses. Single-index
     // ExtractValueInst uses can be tracked; anything more complicated is
@@ -1271,7 +1274,6 @@ CallOverdefined:
   }
 }
 
-
 void SCCPSolver::Solve() {
   // Process the work lists until they are empty!
   while (!BBWorkList.empty() || !InstWorkList.empty() ||
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 4931ab3..35907fd 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -425,14 +425,26 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   if (L) {
     if (IsLoopEntry) {
-      if (Loop *PredLoop = LI->getLoopFor(Preds[0])) {
-        // Add the new block to the nearest enclosing loop (and not an
-        // adjacent loop).
-        while (PredLoop && !PredLoop->contains(BB))
-          PredLoop = PredLoop->getParentLoop();
-        if (PredLoop)
-          PredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-      }
+      // Add the new block to the nearest enclosing loop (and not an
+      // adjacent loop). To find this, examine each of the predecessors and
+      // determine which loops enclose them, and select the most-nested loop
+      // which contains the loop containing the block being split.
+      Loop *InnermostPredLoop = 0;
+      for (unsigned i = 0; i != NumPreds; ++i)
+        if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+          // Seek a loop which actually contains the block being split (to
+          // avoid adjacent loops).
+          while (PredLoop && !PredLoop->contains(BB))
+            PredLoop = PredLoop->getParentLoop();
+          // Select the most-nested of these loops which contains the block.
+          if (PredLoop &&
+              PredLoop->contains(BB) &&
+              (!InnermostPredLoop ||
+               InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+            InnermostPredLoop = PredLoop;
+        }
+      if (InnermostPredLoop)
+        InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
     } else {
       L->addBasicBlockToLoop(NewBB, LI->getBase());
       if (SplitMakesNewLoopHeader)
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d00d69..619c939 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -444,18 +444,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
   if (InlinedFunctionInfo.ContainsDynamicAllocas) {
     Module *M = Caller->getParent();
     // Get the two intrinsics we care about.
-    Constant *StackSave, *StackRestore;
-    StackSave    = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
-    StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore);
+    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
 
     // If we are preserving the callgraph, add edges to the stacksave/restore
     // functions for the calls we insert.
     CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
     if (CG) {
-      // We know that StackSave/StackRestore are Function*'s, because they are
-      // intrinsics which must have the right types.
-      StackSaveCGN    = CG->getOrInsertFunction(cast<Function>(StackSave));
-      StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
+      StackSaveCGN    = CG->getOrInsertFunction(StackSave);
+      StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
       CallerNode = (*CG)[Caller];
     }
 
@@ -480,7 +477,8 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
       for (Function::iterator BB = FirstNewBlock, E = Caller->end();
            BB != E; ++BB)
         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-          CallInst::Create(StackRestore, SavedPtr, "", UI);
+          CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
+          if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
           ++NumStackRestores;
         }
     }
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 1fa51a3..7f11acf 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -33,11 +33,11 @@ namespace {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
         if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
-          AI->setName("tmp");
+          AI->setName("arg");
 
       for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
         if (!BB->hasName())
-          BB->setName("BB");
+          BB->setName("bb");
         
         for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
           if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index f26d7c1..9c9113d 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -1,4 +1,4 @@
-//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===//
+//===- LowerAllocations.cpp - Reduce free insts to calls ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,18 +29,15 @@ using namespace llvm;
 STATISTIC(NumLowered, "Number of allocations lowered");
 
 namespace {
-  /// LowerAllocations - Turn malloc and free instructions into @malloc and
-  /// @free calls.
+  /// LowerAllocations - Turn free instructions into @free calls.
   ///
   class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
     Constant *FreeFunc;   // Functions in the module we are processing
                           // Initialized by doInitialization
-    bool LowerMallocArgToInteger;
   public:
     static char ID; // Pass ID, replacement for typeid
-    explicit LowerAllocations(bool LowerToInt = false)
-      : BasicBlockPass(&ID), FreeFunc(0), 
-        LowerMallocArgToInteger(LowerToInt) {}
+    explicit LowerAllocations()
+      : BasicBlockPass(&ID), FreeFunc(0) {}
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<TargetData>();
@@ -54,7 +51,7 @@ namespace {
     }
 
     /// doPassInitialization - For the lower allocations pass, this ensures that
-    /// a module contains a declaration for a malloc and a free function.
+    /// a module contains a declaration for a free function.
     ///
     bool doInitialization(Module &M);
 
@@ -76,13 +73,13 @@ X("lowerallocs", "Lower allocations from instructions to calls");
 // Publically exposed interface to pass...
 const PassInfo *const llvm::LowerAllocationsID = &X;
 // createLowerAllocationsPass - Interface to this file...
-Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
-  return new LowerAllocations(LowerMallocArgToInteger);
+Pass *llvm::createLowerAllocationsPass() {
+  return new LowerAllocations();
 }
 
 
 // doInitialization - For the lower allocations pass, this ensures that a
-// module contains a declaration for a malloc and a free function.
+// module contains a declaration for a free function.
 //
 // This function is always successful.
 //
@@ -102,25 +99,9 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
 
   BasicBlock::InstListType &BBIL = BB.getInstList();
 
-  const TargetData &TD = getAnalysis<TargetData>();
-  const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());
-
-  // Loop over all of the instructions, looking for malloc or free instructions
+  // Loop over all of the instructions, looking for free instructions
   for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
-    if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
-      Value *ArraySize = MI->getOperand(0);
-      if (ArraySize->getType() != IntPtrTy)
-        ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy,
-                                                false /*ZExt*/, "", I);
-      Value *MCast = CallInst::CreateMalloc(I, IntPtrTy,
-                                            MI->getAllocatedType(), ArraySize);
-
-      // Replace all uses of the old malloc inst with the cast inst
-      MI->replaceAllUsesWith(MCast);
-      I = --BBIL.erase(I);         // remove and delete the malloc instr...
-      Changed = true;
-      ++NumLowered;
-    } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
+    if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
       Value *PtrCast = 
         new BitCastInst(FI->getOperand(0),
                Type::getInt8PtrTy(BB.getContext()), "", I);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 780ee26..8a07c35 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -48,7 +48,7 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
-  
+
   if (IPI == 0)
     IPI = new IncomingPredInfoTy();
   else
@@ -104,12 +104,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // GetValueAtEndOfBlock to do our work.
   if (!getAvailableVals(AV).count(BB))
     return GetValueAtEndOfBlock(BB);
-  
+
   // Otherwise, we have the hard case.  Get the live-in values for each
   // predecessor.
   SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
   Value *SingularValue = 0;
-  
+
   // We can get our predecessor info by walking the pred_iterator list, but it
   // is relatively slow.  If we already have PHI nodes in this block, walk one
   // of them to get the predecessor list instead.
@@ -118,7 +118,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
       BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
       Value *PredVal = GetValueAtEndOfBlock(PredBB);
       PredValues.push_back(std::make_pair(PredBB, PredVal));
-      
+
       // Compute SingularValue.
       if (i == 0)
         SingularValue = PredVal;
@@ -131,7 +131,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
       BasicBlock *PredBB = *PI;
       Value *PredVal = GetValueAtEndOfBlock(PredBB);
       PredValues.push_back(std::make_pair(PredBB, PredVal));
-      
+
       // Compute SingularValue.
       if (isFirstPred) {
         SingularValue = PredVal;
@@ -140,25 +140,25 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
         SingularValue = 0;
     }
   }
-  
+
   // If there are no predecessors, just return undef.
   if (PredValues.empty())
     return UndefValue::get(PrototypeValue->getType());
-  
+
   // Otherwise, if all the merged values are the same, just use it.
   if (SingularValue != 0)
     return SingularValue;
-  
+
   // Otherwise, we do need a PHI: insert one now.
   PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
                                          PrototypeValue->getName(),
                                          &BB->front());
   InsertedPHI->reserveOperandSpace(PredValues.size());
-  
+
   // Fill in all the predecessors of the PHI.
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
     InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
-  
+
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
   if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
@@ -168,7 +168,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 
   // If the client wants to know about all new instructions, tell it.
   if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
-  
+
   DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
   return InsertedPHI;
 }
@@ -177,11 +177,14 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 /// which use their value in the corresponding predecessor.
 void SSAUpdater::RewriteUse(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
-  BasicBlock *UseBB = User->getParent();
-  if (PHINode *UserPN = dyn_cast<PHINode>(User))
-    UseBB = UserPN->getIncomingBlock(U);
   
-  U.set(GetValueInMiddleOfBlock(UseBB));
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueInMiddleOfBlock(User->getParent());
+
+  U.set(V);
 }
 
 
@@ -192,11 +195,11 @@ void SSAUpdater::RewriteUse(Use &U) {
 ///
 Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  
+
   // Query AvailableVals by doing an insertion of null.
   std::pair<AvailableValsTy::iterator, bool> InsertRes =
   AvailableVals.insert(std::make_pair(BB, WeakVH()));
-  
+
   // Handle the case when the insertion fails because we have already seen BB.
   if (!InsertRes.second) {
     // If the insertion failed, there are two cases.  The first case is that the
@@ -204,7 +207,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
     // return the value.
     if (InsertRes.first->second != 0)
       return InsertRes.first->second;
-    
+
     // Otherwise, if the value we find is null, then this is the value is not
     // known but it is being computed elsewhere in our recursion.  This means
     // that we have a cycle.  Handle this by inserting a PHI node and returning
@@ -214,7 +217,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
     PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
                     &BB->front());
   }
-  
+
   // Okay, the value isn't in the map and we just inserted a null in the entry
   // to indicate that we're processing the block.  Since we have no idea what
   // value is in this block, we have to recurse through our predecessors.
@@ -225,13 +228,13 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   // of the recursion, just use IncomingPredInfo as an explicit stack.
   IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
   unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-  
+
   // As we're walking the predecessors, keep track of whether they are all
   // producing the same value.  If so, this value will capture it, if not, it
   // will get reset to null.  We distinguish the no-predecessor case explicitly
   // below.
   TrackingVH<Value> SingularValue;
-  
+
   // We can get our predecessor info by walking the pred_iterator list, but it
   // is relatively slow.  If we already have PHI nodes in this block, walk one
   // of them to get the predecessor list instead.
@@ -240,7 +243,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
       Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
       IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-      
+
       // Compute SingularValue.
       if (i == 0)
         SingularValue = PredVal;
@@ -253,7 +256,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       BasicBlock *PredBB = *PI;
       Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
       IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-      
+
       // Compute SingularValue.
       if (isFirstPred) {
         SingularValue = PredVal;
@@ -262,19 +265,19 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
         SingularValue = 0;
     }
   }
-  
+
   // If there are no predecessors, then we must have found an unreachable block
   // just return 'undef'.  Since there are no predecessors, InsertRes must not
   // be invalidated.
   if (IncomingPredInfo.size() == FirstPredInfoEntry)
     return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
-  
+
   /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
   /// this block is involved in a loop, a no-entry PHI node will have been
   /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
   /// above.
   TrackingVH<Value> &InsertedVal = AvailableVals[BB];
-  
+
   // If all the predecessor values are the same then we don't need to insert a
   // PHI.  This is the simple and common case.
   if (SingularValue) {
@@ -291,31 +294,31 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
     } else {
       InsertedVal = SingularValue;
     }
-    
+
     // Drop the entries we added in IncomingPredInfo to restore the stack.
     IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
                            IncomingPredInfo.end());
     return InsertedVal;
   }
-  
+
   // Otherwise, we do need a PHI: insert one now if we don't already have one.
   if (InsertedVal == 0)
     InsertedVal = PHINode::Create(PrototypeValue->getType(),
                                   PrototypeValue->getName(), &BB->front());
-  
+
   PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
   InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
-  
+
   // Fill in all the predecessors of the PHI.
   for (IncomingPredInfoTy::iterator I =
          IncomingPredInfo.begin()+FirstPredInfoEntry,
        E = IncomingPredInfo.end(); I != E; ++I)
     InsertedPHI->addIncoming(I->second, I->first);
-  
+
   // Drop the entries we added in IncomingPredInfo to restore the stack.
   IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
                          IncomingPredInfo.end());
-  
+
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
   if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
@@ -324,12 +327,10 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
     InsertedVal = ConstVal;
   } else {
     DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
-    
+
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
   }
-  
+
   return InsertedVal;
 }
-
-