9 files changed, 602 insertions, 251 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 2bb6428..a612634 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -127,17 +127,8 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
-       UI != E; ++UI) {
-    CallSite CS = CallSite::get(*UI);
-    if (!CS.getInstruction())       // "Taking the address" of the function
-      return false;
-
-    // Ensure that this call site is CALLING the function, not passing it as
-    // an argument.
-    if (!CS.isCallee(UI))
-      return false;
-  }
+  if (F->hasAddressTaken())
+    return false;
 
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 666db7e..e480dad 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -175,15 +175,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
 
   // Ensure that the function is only directly called.
-  for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) {
-    // If this use is anything other than a call site, give up.
-    CallSite CS = CallSite::get(*I);
-    Instruction *TheCall = CS.getInstruction();
-    if (!TheCall) return false;   // Not a direct call site?
-
-    // The addr of this function is passed to the call.
-    if (!CS.isCallee(I)) return false;
-  }
+  if (Fn.hasAddressTaken())
+    return false;
 
   // Okay, we know we can transform this function if safe.  Scan its body
   // looking for calls to llvm.vastart.
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index db378b0..9c652b9 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -47,7 +47,6 @@ namespace {
     void GlobalIsNeeded(GlobalValue *GV);
     void MarkUsedGlobalsAsNeeded(Constant *C);
 
-    bool SafeToDestroyConstant(Constant* C);
     bool RemoveUnusedGlobalValue(GlobalValue &GV);
   };
 }
@@ -211,17 +210,3 @@ bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
   GV.removeDeadConstantUsers();
   return GV.use_empty();
 }
-
-// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-// by constants itself.  Note that constants cannot be cyclic, so this test is
-// pretty easy to implement recursively.
-//
-bool GlobalDCE::SafeToDestroyConstant(Constant *C) {
-  for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I)
-    if (Constant *User = dyn_cast<Constant>(*I)) {
-      if (!SafeToDestroyConstant(User)) return false;
-    } else {
-      return false;
-    }
-  return true;
-}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 5f12825..9a1b294 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -136,16 +136,16 @@ struct VISIBILITY_HIDDEN GlobalStatus {
 
 }
 
-/// ConstantIsDead - Return true if the specified constant is (transitively)
-/// dead.  The constant may be used by other constants (e.g. constant arrays and
-/// constant exprs) as long as they are dead, but it cannot be used by anything
-/// else.
-static bool ConstantIsDead(Constant *C) {
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself.  Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(Constant *C) {
   if (isa<GlobalValue>(C)) return false;
 
   for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
     if (Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!ConstantIsDead(CU)) return false;
+      if (!SafeToDestroyConstant(CU)) return false;
     } else
       return false;
   return true;
@@ -233,7 +233,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
     } else if (Constant *C = dyn_cast<Constant>(*UI)) {
       GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
-      if (!ConstantIsDead(C))
+      if (!SafeToDestroyConstant(C))
         return true;
     } else {
       GS.HasNonInstructionUser = true;
@@ -338,7 +338,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
     } else if (Constant *C = dyn_cast<Constant>(U)) {
       // If we have a chain of dead constantexprs or other things dangling from
       // us, and if they are all dead, nuke them without remorse.
-      if (ConstantIsDead(C)) {
+      if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
         CleanupConstantGlobalUsers(V, Init);
@@ -354,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
 static bool isSafeSROAElementUse(Value *V) {
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantIsDead(C);
+    return SafeToDestroyConstant(C);
   
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
@@ -1769,22 +1769,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   return false;
 }
 
-/// OnlyCalledDirectly - Return true if the specified function is only called
-/// directly.  In other words, its address is never taken.
-static bool OnlyCalledDirectly(Function *F) {
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
-    Instruction *User = dyn_cast<Instruction>(*UI);
-    if (!User) return false;
-    if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
-
-    // See if the function address is passed as an argument.
-    for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
-         i != e; ++i)
-      if (*i == F) return false;
-  }
-  return true;
-}
-
 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
 /// function, changing them to FastCC.
 static void ChangeCalleesToFastCall(Function *F) {
@@ -1830,7 +1814,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       ++NumFnDeleted;
     } else if (F->hasLocalLinkage()) {
       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
         // If this function has C calling conventions, is not a varargs
         // function, and is only called directly, promote it to use the Fast
         // calling convention.
@@ -1841,7 +1825,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       }
 
       if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
         // The function is not used by a trampoline intrinsic, so it is safe
         // to remove the 'nest' attribute.
         RemoveNestAttribute(F);
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 17bc2d4..5693cc0 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -9,10 +9,6 @@
 //
 // This pass looks for equivalent functions that are mergable and folds them.
 //
-// A Function will not be analyzed if:
-// * it is overridable at runtime (except for weak linkage), or
-// * it is used by anything other than the callee parameter of a call/invoke
-//
 // A hash is computed from the function, based on its type and number of
 // basic blocks.
 //
@@ -24,8 +20,6 @@
 // When a match is found, the functions are folded. We can only fold two
 // functions when we know that the definition of one of them is not
 // overridable.
-// * fold a function marked internal by replacing all of its users.
-// * fold extern or weak functions by replacing them with a global alias
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,6 +42,7 @@
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
@@ -62,7 +57,6 @@
 using namespace llvm;
 
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
-STATISTIC(NumMergeFails, "Number of identical function pairings not merged");
 
 namespace {
   struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
@@ -81,16 +75,169 @@ ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
 }
 
+// ===----------------------------------------------------------------------===
+// Comparison of functions
+// ===----------------------------------------------------------------------===
+
 static unsigned long hash(const Function *F) {
-  return F->size() ^ reinterpret_cast<unsigned long>(F->getType());
-  //return F->size() ^ F->arg_size() ^ F->getReturnType();
+  const FunctionType *FTy = F->getFunctionType();
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
+}
+
+/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
+/// walking the chain of cast operands. Otherwise, returns the argument.
+static Value* IgnoreBitcasts(Value *V) {
+  while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
+    V = BC->getOperand(0);
+
+  return V;
+}
+
+/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
+/// type equivalence rules apply.
+static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+  if (Ty1 == Ty2)
+    return true;
+  if (Ty1->getTypeID() != Ty2->getTypeID())
+    return false;
+
+  switch(Ty1->getTypeID()) {
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+    return true;
+
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
+  default:
+    assert(0 && "Unknown type!");
+    return false;
+
+  case Type::PointerTyID: {
+    const PointerType *PTy1 = cast<PointerType>(Ty1);
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+  }
+
+  case Type::StructTyID: {
+    const StructType *STy1 = cast<StructType>(Ty1);
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy1->getNumElements() != STy2->getNumElements())
+      return false;
+
+    if (STy1->isPacked() != STy2->isPacked())
+      return false;
+
+    for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy1->getNumParams() != FTy2->getNumParams() ||
+        FTy1->isVarArg() != FTy2->isVarArg())
+      return false;
+
+    if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+      return false;
+
+    for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+      if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::ArrayTyID:
+  case Type::VectorTyID: {
+    const SequentialType *STy1 = cast<SequentialType>(Ty1);
+    const SequentialType *STy2 = cast<SequentialType>(Ty2);
+    return isEquivalentType(STy1->getElementType(), STy2->getElementType());
+  }
+  }
+}
+
+/// isEquivalentOperation - determine whether the two operations are the same
+/// except that pointer-to-A and pointer-to-B are equivalent. This should be
+/// kept in sync with Instruction::isSameOperationAs.
+static bool
+isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+  if (I1->getOpcode() != I2->getOpcode() ||
+      I1->getNumOperands() != I2->getNumOperands() ||
+      !isEquivalentType(I1->getType(), I2->getType()))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+    if (!isEquivalentType(I1->getOperand(i)->getType(),
+                          I2->getOperand(i)->getType()))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(I1))
+    return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<CallInst>(I2)->getAttributes().getRawPointer();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
 }
 
 static bool compare(const Value *V, const Value *U) {
   assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
          "Must not compare basic blocks.");
 
-  assert(V->getType() == U->getType() &&
+  assert(isEquivalentType(V->getType(), U->getType()) &&
         "Two of the same operation have operands of different type.");
 
   // TODO: If the constant is an expression of F, we should accept that it's
@@ -117,20 +264,40 @@ static bool compare(const Value *V, const Value *U) {
 static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
                    DenseMap<const Value *, const Value *> &ValueMap,
                    DenseMap<const Value *, const Value *> &SpeculationMap) {
-  // Specutively add it anyways. If it's false, we'll notice a difference later, and
-  // this won't matter.
+  // Speculatively add it anyways. If it's false, we'll notice a difference
+  // later, and this won't matter.
   ValueMap[BB1] = BB2;
 
   BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
   BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
 
   do {
-    if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI)))
-      return false;
+    if (isa<BitCastInst>(FI)) {
+      ++FI;
+      continue;
+    }
+    if (isa<BitCastInst>(GI)) {
+      ++GI;
+      continue;
+    }
 
-    if (FI->getNumOperands() != GI->getNumOperands())
+    if (!isEquivalentOperation(FI, GI))
       return false;
 
+    if (isa<GetElementPtrInst>(FI)) {
+      const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
+      const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
+      if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
+        // It's effectively a bitcast.
+        ++FI, ++GI;
+        continue;
+      }
+
+      // TODO: we only really care about the elements before the index
+      if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+        return false;
+    }
+
     if (ValueMap[FI] == GI) {
       ++FI, ++GI;
       continue;
@@ -140,8 +307,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
       return false;
 
     for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-      Value *OpF = FI->getOperand(i);
-      Value *OpG = GI->getOperand(i);
+      Value *OpF = IgnoreBitcasts(FI->getOperand(i));
+      Value *OpG = IgnoreBitcasts(GI->getOperand(i));
 
       if (ValueMap[OpF] == OpG)
         continue;
@@ -149,10 +316,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
       if (ValueMap[OpF] != NULL)
         return false;
 
-      assert(OpF->getType() == OpG->getType() &&
-             "Two of the same operation has operands of different type.");
-
-      if (OpF->getValueID() != OpG->getValueID())
+      if (OpF->getValueID() != OpG->getValueID() ||
+          !isEquivalentType(OpF->getType(), OpG->getType()))
         return false;
 
       if (isa<PHINode>(FI)) {
@@ -203,14 +368,15 @@ static bool equals(const Function *F, const Function *G) {
   if (F->hasSection() && F->getSection() != G->getSection())
     return false;
 
+  if (F->isVarArg() != G->isVarArg())
+    return false;
+
   // TODO: if it's internal and only used in direct calls, we could handle this
   // case too.
   if (F->getCallingConv() != G->getCallingConv())
     return false;
 
-  // TODO: We want to permit cases where two functions take T* and S* but
-  // only load or store them into T** and S**.
-  if (F->getType() != G->getType())
+  if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
     return false;
 
   DenseMap<const Value *, const Value *> ValueMap;
@@ -237,89 +403,213 @@ static bool equals(const Function *F, const Function *G) {
   return true;
 }
 
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
-  if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden())
-    std::swap(FnVec[i], FnVec[j]);
-
-  Function *F = FnVec[i];
-  Function *G = FnVec[j];
+// ===----------------------------------------------------------------------===
+// Folding of functions
+// ===----------------------------------------------------------------------===
+
+// Cases:
+// * F is external strong, G is external strong:
+//   turn G into a thunk to F    (1)
+// * F is external strong, G is external weak:
+//   turn G into a thunk to F    (1)
+// * F is external weak, G is external weak:
+//   unfoldable
+// * F is external strong, G is internal:
+//   address of G taken:
+//     turn G into a thunk to F  (1)
+//   address of G not taken:
+//     make G an alias to F      (2)
+// * F is internal, G is external weak
+//   address of F is taken:
+//     turn G into a thunk to F  (1)
+//   address of F is not taken:
+//     make G an alias of F      (2)
+// * F is internal, G is internal:
+//   address of F and G are taken:
+//     turn G into a thunk to F  (1)
+//   address of G is not taken:
+//     make G an alias to F      (2)
+//
+// alias requires linkage == (external,local,weak) fallback to creating a thunk
+// external means 'externally visible' linkage != (internal,private)
+// internal means linkage == (internal,private)
+// weak means linkage mayBeOverridable
+// being external implies that the address is taken
+//
+// 1. turn G into a thunk to F
+// 2. make G an alias to F
+
+enum LinkageCategory {
+  ExternalStrong,
+  ExternalWeak,
+  Internal
+};
+
+static LinkageCategory categorize(const Function *F) {
+  switch (F->getLinkage()) {
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    return Internal;
+
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalWeakLinkage:
+    return ExternalWeak;
+
+  case GlobalValue::ExternalLinkage:
+  case GlobalValue::AvailableExternallyLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::AppendingLinkage:
+  case GlobalValue::DLLImportLinkage:
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::GhostLinkage:
+  case GlobalValue::CommonLinkage:
+    return ExternalStrong;
+  }
 
-  if (!F->mayBeOverridden()) {
-    if (G->hasLocalLinkage()) {
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      G->replaceAllUsesWith(F);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
-    }
+  assert(0 && "Unknown LinkageType.");
+  return ExternalWeak;
+}
 
-    if (G->hasExternalLinkage() || G->hasWeakLinkage()) {
-      GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      GA->takeName(G);
-      GA->setVisibility(G->getVisibility());
-      G->replaceAllUsesWith(GA);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
+static void ThunkGToF(Function *F, Function *G) {
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create("", NewG);
+
+  std::vector<Value *> Args;
+  unsigned i = 0;
+  const FunctionType *FFTy = F->getFunctionType();
+  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+       AI != AE; ++AI) {
+    if (FFTy->getParamType(i) == AI->getType())
+      Args.push_back(AI);
+    else {
+      Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
+      Args.push_back(BCI);
     }
+    ++i;
   }
 
-  if (F->hasWeakLinkage() && G->hasWeakLinkage()) {
-    GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "",
-                                        0, F->getParent());
-    GA_F->takeName(F);
-    GA_F->setVisibility(F->getVisibility());
-    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-    F->replaceAllUsesWith(GA_F);
-    F->setName("folded." + GA_F->getName());
-    F->setLinkage(GlobalValue::ExternalLinkage);
-    GA_F->setAliasee(F);
-
-    GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-    GA_G->takeName(G);
-    GA_G->setVisibility(G->getVisibility());
-    G->replaceAllUsesWith(GA_G);
-    G->eraseFromParent();
-
-    ++NumFunctionsMerged;
-    return true;
+  CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  if (NewG->getReturnType() == Type::VoidTy) {
+    ReturnInst::Create(BB);
+  } else if (CI->getType() != NewG->getReturnType()) {
+    Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
+    ReturnInst::Create(BCI, BB);
+  } else {
+    ReturnInst::Create(CI, BB);
   }
 
-  DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n";
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
 
-  ++NumMergeFails;
-  return false;
+  // TODO: look at direct callers to G and make them all direct callers to F.
 }
 
-static bool hasAddressTaken(User *U) {
-  for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) {
-    User *Use = *I;
+static void AliasGToF(Function *F, Function *G) {
+  if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
+    return ThunkGToF(F, G);
+
+  GlobalAlias *GA = new GlobalAlias(
+    G->getType(), G->getLinkage(), "",
+    ConstantExpr::getBitCast(F, G->getType()), G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+}
 
-    // 'call (bitcast @F to ...)' happens a lot.
-    while (isa<ConstantExpr>(Use) && Use->hasOneUse()) {
-      Use = *Use->use_begin();
-    }
+static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
+  Function *F = FnVec[i];
+  Function *G = FnVec[j];
 
-    if (isa<ConstantExpr>(Use)) {
-      if (hasAddressTaken(Use))
-        return true;
-    }
+  LinkageCategory catF = categorize(F);
+  LinkageCategory catG = categorize(G);
 
-    if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use))
-      return true;
+  if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
+    std::swap(FnVec[i], FnVec[j]);
+    std::swap(F, G);
+    std::swap(catF, catG);
+  }
 
-    // Make sure we aren't passing U as a parameter to call instead of the
-    // callee.
-    if (CallSite(cast<Instruction>(Use)).hasArgument(U))
-      return true;
+  switch (catF) {
+    case ExternalStrong:
+      switch (catG) {
+        case ExternalStrong:
+        case ExternalWeak:
+          ThunkGToF(F, G);
+          break;
+        case Internal:
+          if (G->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+          break;
+      }
+      break;
+
+    case ExternalWeak: {
+      assert(catG == ExternalWeak);
+
+      // Make them both thunks to the same internal function.
+      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      F->replaceAllUsesWith(H);
+
+      ThunkGToF(F, G);
+      ThunkGToF(F, H);
+
+      F->setLinkage(GlobalValue::InternalLinkage);
+    } break;
+
+    case Internal:
+      switch (catG) {
+        case ExternalStrong:
+          assert(0);
+          // fall-through
+        case ExternalWeak:
+	  if (F->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+	  break;
+        case Internal: {
+          bool addrTakenF = F->hasAddressTaken();
+          bool addrTakenG = G->hasAddressTaken();
+          if (!addrTakenF && addrTakenG) {
+            std::swap(FnVec[i], FnVec[j]);
+            std::swap(F, G);
+	    std::swap(addrTakenF, addrTakenG);
+	  }
+
+          if (addrTakenF && addrTakenG) {
+            ThunkGToF(F, G);
+          } else {
+            assert(!addrTakenG);
+            AliasGToF(F, G);
+          }
+	} break;
+      }
+      break;
   }
 
-  return false;
+  ++NumFunctionsMerged;
+  return true;
 }
 
+// ===----------------------------------------------------------------------===
+// Pass definition
+// ===----------------------------------------------------------------------===
+
 bool MergeFunctions::runOnModule(Module &M) {
   bool Changed = false;
 
@@ -329,25 +619,19 @@ bool MergeFunctions::runOnModule(Module &M) {
     if (F->isDeclaration() || F->isIntrinsic())
       continue;
 
-    if (!F->hasLocalLinkage() && !F->hasExternalLinkage() &&
-        !F->hasWeakLinkage())
-      continue;
-
-    if (hasAddressTaken(F))
-      continue;
-
     FnMap[hash(F)].push_back(F);
   }
 
-  // TODO: instead of running in a loop, we could also fold functions in callgraph
-  // order. Constructing the CFG probably isn't cheaper than just running in a loop.
+  // TODO: instead of running in a loop, we could also fold functions in
+  // callgraph order. Constructing the CFG probably isn't cheaper than just
+  // running in a loop, unless it happened to already be available.
 
   bool LocalChanged;
   do {
     LocalChanged = false;
+    DOUT << "size: " << FnMap.size() << "\n";
     for (std::map<unsigned long, std::vector<Function *> >::iterator
          I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
-      DOUT << "size: " << FnMap.size() << "\n";
       std::vector<Function *> &FnVec = I->second;
       DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 0000000..b3a25540
--- /dev/null
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,171 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+namespace {
+  struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(&ID) {}
+    
+    bool runOnModule(Module& M);
+    
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner");
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock* entryBlock = F->begin();
+  if (!isa<BranchInst>(entryBlock->getTerminator()))
+    return 0;
+  
+  BasicBlock* returnBlock = 0;
+  BasicBlock* nonReturnBlock = 0;
+  unsigned returnCount = 0;
+  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+       SI != SE; ++SI)
+    if (isa<ReturnInst>((*SI)->getTerminator())) {
+      returnBlock = *SI;
+      returnCount++;
+    } else
+      nonReturnBlock = *SI;
+  
+  if (returnCount != 1)
+    return 0;
+  
+  // Clone the function, so that we can hack away on it.
+  DenseMap<const Value*, Value*> ValueMap;
+  Function* duplicateFunction = CloneFunction(F, ValueMap);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+                                              newReturnBlock->getFirstNonPHI());
+  BasicBlock::iterator I = preReturn->begin();
+  BasicBlock::iterator Ins = newReturnBlock->begin();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+    
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+    
+    retPhi->addIncoming(I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(FI);
+      
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.runOnFunction(*duplicateFunction);
+  
+  // Extract the body of the the if.
+  Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User*> Users(duplicateFunction->use_begin(), 
+                           duplicateFunction->use_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst* CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI);
+    else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+    worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (Function::use_iterator UI = currFunc->use_begin(),
+         UE = currFunc->use_end(); UI != UE; ++UI)
+      if (Instruction* I = dyn_cast<Instruction>(UI))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
+\ No newline at end of file
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 83503fd..38b1198 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 
   // Expand the code for the iteration count into the preheader of the loop.
   BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
                                           Preheader->getTerminator());
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
@@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
   if (NeedCannIV) {
+    // Check to see if the loop already has a canonical-looking induction
+    // variable. If one is present and it's wider than the planned canonical
+    // induction variable, temporarily remove it, so that the Rewriter
+    // doesn't attempt to reuse it.
+    PHINode *OldCannIV = L->getCanonicalInductionVariable();
+    if (OldCannIV) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        OldCannIV = 0;
+    }
+
     IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+
     ++NumInserted;
     Changed = true;
     DOUT << "INDVARS: New CanIV: " << *IndVar;
+
+    // Now that the official induction variable is established, reinsert
+    // the old canonical-looking variable after it so that the IR remains
+    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    if (OldCannIV)
+      OldCannIV->insertAfter(cast<Instruction>(IndVar));
   }
 
   // If we have a trip count expression, rewrite the loop's exit condition
@@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
          E = List.end(); UI != E; ++UI) {
       SCEVHandle Offset = UI->getOffset();
       Value *Op = UI->getOperandValToReplace();
+      const Type *UseTy = Op->getType();
       Instruction *User = UI->getUser();
-      bool isSigned = UI->isSigned();
 
       // Compute the final addrec to expand into code.
       SCEVHandle AR = IU->getReplacementExpr(*UI);
@@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         // Expand loop-invariant values in the loop preheader. They will
         // be sunk to the exit block later, if possible.
         NewVal =
-          Rewriter.expandCodeFor(AR, LargestType,
+          Rewriter.expandCodeFor(AR, UseTy,
                                  L->getLoopPreheader()->getTerminator());
         Rewriter.setInsertionPoint(I);
         ++NumReplaced;
@@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         if (!Stride->isLoopInvariant(L))
           continue;
 
-        const Type *IVTy = Offset->getType();
-        const Type *UseTy = Op->getType();
-
-        // Promote the Offset and Stride up to the canonical induction
-        // variable's bit width.
-        SCEVHandle PromotedOffset = Offset;
-        SCEVHandle PromotedStride = Stride;
-        if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
-          // It doesn't matter for correctness whether zero or sign extension
-          // is used here, since the value is truncated away below, but if the
-          // value is signed, sign extension is more likely to be folded.
-          if (isSigned) {
-            PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
-            PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
-          } else {
-            PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
-            // If the stride is obviously negative, use sign extension to
-            // produce things like x-1 instead of x+255.
-            if (isa<SCEVConstant>(PromotedStride) &&
-                cast<SCEVConstant>(PromotedStride)
-                  ->getValue()->getValue().isNegative())
-              PromotedStride = SE->getSignExtendExpr(PromotedStride,
-                                                     LargestType);
-            else
-              PromotedStride = SE->getZeroExtendExpr(PromotedStride,
-                                                     LargestType);
-          }
-        }
-
-        // Create the SCEV representing the offset from the canonical
-        // induction variable, still in the canonical induction variable's
-        // type, so that all expanded arithmetic is done in the same type.
-        SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
-                                             PromotedStride, L);
-        // Add the PromotedOffset as a separate step, because it may not be
-        // loop-invariant.
-        NewAR = SE->getAddExpr(NewAR, PromotedOffset);
-
-        // Expand the addrec into instructions.
-        Value *V = Rewriter.expandCodeFor(NewAR);
-
-        // Insert an explicit cast if necessary to truncate the value
-        // down to the original stride type. This is done outside of
-        // SCEVExpander because in SCEV expressions, a truncate of an
-        // addrec is always folded.
-        if (LargestType != IVTy) {
-          if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
-            NewAR = SE->getTruncateExpr(NewAR, IVTy);
-          if (Rewriter.isInsertedExpression(NewAR))
-            V = Rewriter.expandCodeFor(NewAR);
-          else {
-            V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
-                                                                IVTy, false),
-                                        V, IVTy);
-            assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
-                   "LargestType wasn't actually the largest type!");
-            // Force the rewriter to use this trunc whenever this addrec
-            // appears so that it doesn't insert new phi nodes or
-            // arithmetic in a different type.
-            Rewriter.addInsertedValue(V, NewAR);
-          }
-        }
-
-        DOUT << "INDVARS: Made offset-and-trunc IV for offset "
-             << *IVTy << " " << *Offset << ": ";
-        DEBUG(WriteAsOperand(*DOUT, V, false));
-        DOUT << "\n";
-
         // Now expand it into actual Instructions and patch it into place.
         NewVal = Rewriter.expandCodeFor(AR, UseTy);
       }
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 6d2ff0e..5465e4a 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2608,21 +2608,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
         return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
     }
-
-    if (Op1I->hasOneUse()) {
-      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
-      // is not used by anyone else...
-      //
-      if (Op1I->getOpcode() == Instruction::FSub) {
-        // Swap the two operands of the subexpr...
-        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
-        Op1I->setOperand(0, IIOp1);
-        Op1I->setOperand(1, IIOp0);
-
-        // Create the new top level fadd instruction...
-        return BinaryOperator::CreateFAdd(Op0, Op1);
-      }
-    }
   }
 
   return 0;
@@ -11824,7 +11809,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
 
   // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr)) {
+  if (isa<ConstantPointerNull>(Ptr) &&
+      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
     if (!isa<UndefValue>(Val)) {
       SI.setOperand(0, UndefValue::get(Val->getType()));
       if (Instruction *U = dyn_cast<Instruction>(Val))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b499279..5a85a04 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -125,13 +125,17 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         }
       }
       
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
-        if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
-            isa<UndefValue>(SI->getOperand(1))) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        Value *Ptr = SI->getOperand(1);
+        
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
           ChangeToUnreachable(SI);
           Changed = true;
           break;
         }
+      }
     }
 
     // Turn invokes that call 'nounwind' functions into ordinary calls.