27 files changed, 14340 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 0000000..8e5a1eb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,582 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+  
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+  
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+  
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+      
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+  
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+    
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+    
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+    
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+    
+    int64_t ConstantOffset = 0;
+    const TargetData *TD = TLI.getTargetData();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+          
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+    
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+      
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+  
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      unsigned opNo = UI.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+    
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+      
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+    
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+                          TLI))
+      return true;
+  }
+
+  return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+  
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+  
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+  
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  BasicBlock *MemBB = MemoryInst->getParent();
+  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+       UI != E; ++UI)
+    // We know that uses of arguments and instructions have to be instructions.
+    if (cast<Instruction>(*UI)->getParent() == MemBB)
+      return true;
+  
+  return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+  
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+  
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+  
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+    
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    Type *AddressAccessTy =
+      cast<PointerType>(Address->getType())->getElementType();
+    
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+    
+    MatchedAddrModeInsts.clear();
+  }
+  
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 0000000..a7f9efd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,686 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+  assert((pred_begin(BB) == pred_end(BB) ||
+         // Can delete self loop.
+         BB->getSinglePredecessor() == BB) && "Block is not dead!");
+  TerminatorInst *BBTerm = BB->getTerminator();
+  
+  // Loop through all of our successors and make sure they know that one
+  // of their predecessors is going away.
+  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+    BBTerm->getSuccessor(i)->removePredecessor(BB);
+  
+  // Zap all the instructions in the block.
+  while (!BB->empty()) {
+    Instruction &I = BB->back();
+    // If this instruction is used, replace uses with an arbitrary value.
+    // Because control flow can't get here, we don't care what we replace the
+    // value with.  Note that since this block is unreachable, and all values
+    // contained within it must dominate their uses, that all uses will
+    // eventually be removed (they are themselves dead).
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    BB->getInstList().pop_back();
+  }
+  
+  // Zap the block!
+  BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor.  If there are
+/// any single-entry PHI nodes in it, fold them away.  This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+  if (!isa<PHINode>(BB->begin())) return;
+  
+  AliasAnalysis *AA = 0;
+  MemoryDependenceAnalysis *MemDep = 0;
+  if (P) {
+    AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+    MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    if (PN->getIncomingValue(0) != PN)
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+    else
+      PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    
+    if (MemDep)
+      MemDep->removeInstruction(PN);  // Memdep updates AA itself.
+    else if (AA && isa<PointerType>(PN->getType()))
+      AA->deleteValue(PN);
+    
+    PN->eraseFromParent();
+  }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+  // Recursively deleting a PHI may cause multiple PHIs to be deleted
+  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  bool Changed = false;
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+      Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+  return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible.  The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+  // Don't merge away blocks who have their address taken.
+  if (BB->hasAddressTaken()) return false;
+  
+  // Can't merge if there are multiple predecessors, or no predecessors.
+  BasicBlock *PredBB = BB->getUniquePredecessor();
+  if (!PredBB) return false;
+
+  // Don't break self-loops.
+  if (PredBB == BB) return false;
+  // Don't break invokes.
+  if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+  
+  succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+  BasicBlock *OnlySucc = BB;
+  for (; SI != SE; ++SI)
+    if (*SI != OnlySucc) {
+      OnlySucc = 0;     // There are multiple distinct successors!
+      break;
+    }
+  
+  // Can't merge if there are multiple successors.
+  if (!OnlySucc) return false;
+
+  // Can't merge if there is PHI loop.
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == PN)
+          return false;
+    } else
+      break;
+  }
+
+  // Begin by getting rid of unneeded PHIs.
+  if (isa<PHINode>(BB->front()))
+    FoldSingleEntryPHINodes(BB, P);
+  
+  // Delete the unconditional branch from the predecessor...
+  PredBB->getInstList().pop_back();
+  
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(PredBB);
+  
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
+  // Inherit predecessors name if it exists.
+  if (!PredBB->hasName())
+    PredBB->takeName(BB);
+  
+  // Finally, erase the old block and update dominator info.
+  if (P) {
+    if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+      if (DomTreeNode *DTN = DT->getNode(BB)) {
+        DomTreeNode *PredDTN = DT->getNode(PredBB);
+        SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+        for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+             DE = Children.end(); DI != DE; ++DI)
+          DT->changeImmediateDominator(*DI, PredDTN);
+
+        DT->eraseNode(BB);
+      }
+      
+      if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+        LI->removeBlock(BB);
+      
+      if (MemoryDependenceAnalysis *MD =
+            P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+        MD->invalidateCachedPredecessors();
+    }
+  }
+  
+  BB->eraseFromParent();
+  return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+                                BasicBlock::iterator &BI, Value *V) {
+  Instruction &I = *BI;
+  // Replaces all of the uses of the instruction with uses of the value
+  I.replaceAllUsesWith(V);
+
+  // Make sure to propagate a name if there is one already.
+  if (I.hasName() && !V->hasName())
+    V->takeName(&I);
+
+  // Delete the unnecessary instruction now...
+  BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I.  The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+                               BasicBlock::iterator &BI, Instruction *I) {
+  assert(I->getParent() == 0 &&
+         "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+  // Insert the new instruction into the basic block...
+  BasicBlock::iterator New = BIL.insert(BI, I);
+
+  // Replace all uses of the old instruction, and delete it.
+  ReplaceInstWithValue(BIL, BI, I);
+
+  // Move BI back to point to the newly inserted instruction
+  BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+  BasicBlock::iterator BI(From);
+  ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+  TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+  unsigned e = Term->getNumSuccessors();
+#endif
+  for (unsigned i = 0; ; ++i) {
+    assert(i != e && "Didn't find edge?");
+    if (Term->getSuccessor(i) == Succ)
+      return i;
+  }
+  return 0;
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+  
+  // If this is a critical edge, let SplitCriticalEdge do it.
+  TerminatorInst *LatchTerm = BB->getTerminator();
+  if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+    return LatchTerm->getSuccessor(SuccNum);
+
+  // If the edge isn't critical, then BB has a single successor or Succ has a
+  // single pred.  Split the block.
+  BasicBlock::iterator SplitPoint;
+  if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+    // If the successor only has a single pred, split the top of the successor
+    // block.
+    assert(SP == BB && "CFG broken");
+    SP = NULL;
+    return SplitBlock(Succ, Succ->begin(), P);
+  }
+  
+  // Otherwise, if BB has a single successor, split it at the bottom of the
+  // block.
+  assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+         "Should have a single succ!"); 
+  return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block.  The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+  BasicBlock::iterator SplitIt = SplitPt;
+  while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+    ++SplitIt;
+  BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
+  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+    if (Loop *L = LI->getLoopFor(Old))
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+    // Old dominates New. New node dominates all other nodes dominated by Old.
+    if (DomTreeNode *OldNode = DT->getNode(Old)) {
+      std::vector<DomTreeNode *> Children;
+      for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+           I != E; ++I) 
+        Children.push_back(*I);
+
+      DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+      for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+             E = Children.end(); I != E; ++I) 
+        DT->changeImmediateDominator(*I, NewNode);
+    }
+  }
+
+  return New;
+}
+
+/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
+/// analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+                                      ArrayRef<BasicBlock *> Preds,
+                                      Pass *P, bool &HasLoopExit) {
+  if (!P) return;
+
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+
+  // If we need to preserve loop analyses, collect some information about how
+  // this split will affect loops.
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  if (LI) {
+    bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+    for (ArrayRef<BasicBlock*>::iterator
+           i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+      BasicBlock *Pred = *i;
+
+      // If we need to preserve LCSSA, determine if any of the preds is a loop
+      // exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Pred))
+          if (!PL->contains(OldBB))
+            HasLoopExit = true;
+
+      // If we need to preserve LoopInfo, note whether any of the preds crosses
+      // an interesting loop boundary.
+      if (!L) continue;
+      if (L->contains(Pred))
+        IsLoopEntry = false;
+      else
+        SplitMakesNewLoopHeader = true;
+    }
+  }
+
+  // Update dominator tree if available.
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  if (!L) return;
+
+  if (IsLoopEntry) {
+    // Add the new block to the nearest enclosing loop (and not an adjacent
+    // loop). To find this, examine each of the predecessors and determine which
+    // loops enclose them, and select the most-nested loop which contains the
+    // loop containing the block being split.
+    Loop *InnermostPredLoop = 0;
+    for (ArrayRef<BasicBlock*>::iterator
+           i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+      BasicBlock *Pred = *i;
+      if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+        // Seek a loop which actually contains the block being split (to avoid
+        // adjacent loops).
+        while (PredLoop && !PredLoop->contains(OldBB))
+          PredLoop = PredLoop->getParentLoop();
+
+        // Select the most-nested of these loops which contains the block.
+        if (PredLoop && PredLoop->contains(OldBB) &&
+            (!InnermostPredLoop ||
+             InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+          InnermostPredLoop = PredLoop;
+      }
+    }
+
+    if (InnermostPredLoop)
+      InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+  } else {
+    L->addBasicBlockToLoop(NewBB, LI->getBase());
+    if (SplitMakesNewLoopHeader)
+      L->moveToHeader(NewBB);
+  }
+}
+
+/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
+/// from NewBB. This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+                           ArrayRef<BasicBlock*> Preds, BranchInst *BI,
+                           Pass *P, bool HasLoopExit) {
+  // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+  for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I++);
+
+    // Check to see if all of the values coming in are the same.  If so, we
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1, e = Preds.size(); i != e; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
+    if (InVal) {
+      // If all incoming values for the new PHI would be the same, just don't
+      // make a new PHI.  Instead, just remove the incoming values from the old
+      // PHI.
+      for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+        PN->removeIncomingValue(Preds[i], false);
+    } else {
+      // If the values coming into the block are not the same, we need a PHI.
+      // Create the new PHI node, insert it into NewBB at the end of the block
+      PHINode *NewPHI =
+        PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+      if (AA) AA->copyValue(PN, NewPHI);
+      
+      // Move all of the PHI values for 'Preds' to the new PHI.
+      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+        Value *V = PN->removeIncomingValue(Preds[i], false);
+        NewPHI->addIncoming(V, Preds[i]);
+      }
+
+      InVal = NewPHI;
+    }
+
+    // Add an incoming value to the PHI node in the loop for the preheader
+    // edge.
+    PN->addIncoming(InVal, NewBB);
+  }
+}
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block.  The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it.  The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
+                                         BasicBlock *const *Preds,
+                                         unsigned NumPreds, const char *Suffix,
+                                         Pass *P) {
+  // Create new basic block, insert right before the original block.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
+  
+  // The new block unconditionally branches to the old block.
+  BranchInst *BI = BranchInst::Create(BB, NewBB);
+  
+  // Move the edges from Preds to point to NewBB instead of BB.
+  for (unsigned i = 0; i != NumPreds; ++i) {
+    // This is slightly more strict than necessary; the minimum requirement
+    // is that there be no more than one indirectbr branching to BB. And
+    // all BlockAddress uses would need to be updated.
+    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+    Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+  }
+
+  // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+  // node becomes an incoming value for BB's phi node.  However, if the Preds
+  // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+  // account for the newly created predecessor.
+  if (NumPreds == 0) {
+    // Insert dummy values as the incoming value.
+    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+      cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+    return NewBB;
+  }
+
+  // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+  bool HasLoopExit = false;
+  UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
+                            P, HasLoopExit);
+
+  // Update the PHI nodes in BB with the values coming from NewBB.
+  UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
+                 P, HasLoopExit);
+  return NewBB;
+}
+
+/// SplitLandingPadPredecessors - This method transforms the landing pad,
+/// OrigBB, by introducing two new basic blocks into the function. One of those
+/// new basic blocks gets the predecessors listed in Preds. The other basic
+/// block gets the remaining predecessors of OrigBB. The landingpad instruction
+/// OrigBB is clone into both of the new basic blocks. The new blocks are given
+/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
+/// 
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
+/// it does not preserve LoopSimplify (because it's complicated to handle the
+/// case where one of the edges being split is an exit of a loop with other
+/// exits).
+/// 
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+                                       ArrayRef<BasicBlock*> Preds,
+                                       const char *Suffix1, const char *Suffix2,
+                                       Pass *P,
+                                       SmallVectorImpl<BasicBlock*> &NewBBs) {
+  assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+  // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+  // it right before the original block.
+  BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+                                          OrigBB->getName() + Suffix1,
+                                          OrigBB->getParent(), OrigBB);
+  NewBBs.push_back(NewBB1);
+
+  // The new block unconditionally branches to the old block.
+  BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+
+  // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    // This is slightly more strict than necessary; the minimum requirement
+    // is that there be no more than one indirectbr branching to BB. And
+    // all BlockAddress uses would need to be updated.
+    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+    Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+  }
+
+  // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+  bool HasLoopExit = false;
+  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+
+  // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+
+  // Move the remaining edges from OrigBB to point to NewBB2.
+  SmallVector<BasicBlock*, 8> NewBB2Preds;
+  for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+       i != e; ) {
+    BasicBlock *Pred = *i++;
+    if (Pred == NewBB1) continue;
+    assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+    NewBB2Preds.push_back(Pred);
+    e = pred_end(OrigBB);
+  }
+
+  BasicBlock *NewBB2 = 0;
+  if (!NewBB2Preds.empty()) {
+    // Create another basic block for the rest of OrigBB's predecessors.
+    NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+                                OrigBB->getName() + Suffix2,
+                                OrigBB->getParent(), OrigBB);
+    NewBBs.push_back(NewBB2);
+
+    // The new block unconditionally branches to the old block.
+    BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+
+    // Move the remaining edges from OrigBB to point to NewBB2.
+    for (SmallVectorImpl<BasicBlock*>::iterator
+           i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
+      (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+    // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+    HasLoopExit = false;
+    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+
+    // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+  }
+
+  LandingPadInst *LPad = OrigBB->getLandingPadInst();
+  Instruction *Clone1 = LPad->clone();
+  Clone1->setName(Twine("lpad") + Suffix1);
+  NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+  if (NewBB2) {
+    Instruction *Clone2 = LPad->clone();
+    Clone2->setName(Twine("lpad") + Suffix2);
+    NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+    // Create a PHI node for the two cloned landingpad instructions.
+    PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+    PN->addIncoming(Clone1, NewBB1);
+    PN->addIncoming(Clone2, NewBB2);
+    LPad->replaceAllUsesWith(PN);
+    LPad->eraseFromParent();
+  } else {
+    // There is no second clone. Just replace the landing pad with the first
+    // clone.
+    LPad->replaceAllUsesWith(Clone1);
+    LPad->eraseFromParent();
+  }
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them.  This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+  const BasicBlock *BB = &F.getEntryBlock();
+  if (succ_begin(BB) == succ_end(BB))
+    return;
+  
+  SmallPtrSet<const BasicBlock*, 8> Visited;
+  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+  SmallPtrSet<const BasicBlock*, 8> InStack;
+  
+  Visited.insert(BB);
+  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+  InStack.insert(BB);
+  do {
+    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+    const BasicBlock *ParentBB = Top.first;
+    succ_const_iterator &I = Top.second;
+    
+    bool FoundNew = false;
+    while (I != succ_end(ParentBB)) {
+      BB = *I++;
+      if (Visited.insert(BB)) {
+        FoundNew = true;
+        break;
+      }
+      // Successor is in VisitStack, it's a back edge.
+      if (InStack.count(BB))
+        Result.push_back(std::make_pair(ParentBB, BB));
+    }
+    
+    if (FoundNew) {
+      // Go down one level if there is a unvisited successor.
+      InStack.insert(BB);
+      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+    } else {
+      // Go up one level.
+      InStack.erase(VisitStack.pop_back_val().first);
+    }
+  } while (!VisitStack.empty()); 
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                             BasicBlock *Pred) {
+  Instruction *UncondBranch = Pred->getTerminator();
+  // Clone the return and add it to the end of the predecessor.
+  Instruction *NewRet = RI->clone();
+  Pred->getInstList().push_back(NewRet);
+      
+  // If the return instruction returns a value, and if the value was a
+  // PHI node in "BB", propagate the right value into the return.
+  for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+       i != e; ++i)
+    if (PHINode *PN = dyn_cast<PHINode>(*i))
+      if (PN->getParent() == BB)
+        *i = PN->getIncomingValueForBlock(Pred);
+      
+  // Update any PHI nodes in the returning block to realize that we no
+  // longer branch to them.
+  BB->removePredecessor(Pred);
+  UncondBranch->eraseFromParent();
+  return cast<ReturnInst>(NewRet);
+}
+
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a 
+/// given basic block.
+DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
+  if (const Instruction *I = BB->getFirstNonPHI())
+    return I->getDebugLoc();
+  // Scanning entire block may be too expensive, if the first instruction
+  // does not have valid location info.
+  return DebugLoc();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 0000000..23a30cc5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information. 
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>     
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+   cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+  /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+  /// container info, used by basic inliner, from public interface.
+  struct BasicInlinerImpl {
+    
+    BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+    void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+  public:
+    BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+    /// addFunction - Add function into the list of functions to process.
+    /// All functions must be inserted using this interface before invoking
+    /// inlineFunctions().
+    void addFunction(Function *F) {
+      Functions.push_back(F);
+    }
+
+    /// neverInlineFunction - Sometimes a function is never to be inlined 
+    /// because of one or other reason. 
+    void neverInlineFunction(Function *F) {
+      NeverInline.insert(F);
+    }
+
+    /// inlineFuctions - Walk all call sites in all functions supplied by
+    /// client. Inline as many call sites as possible. Delete completely
+    /// inlined functions.
+    void inlineFunctions();
+    
+  private:
+    TargetData *TD;
+    std::vector<Function *> Functions;
+    SmallPtrSet<const Function *, 16> NeverInline;
+    SmallPtrSet<Function *, 8> DeadFunctions;
+    InlineCostAnalyzer CA;
+  };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+      
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  std::vector<CallSite> CallSites;
+  
+  for (std::vector<Function *>::iterator FI = Functions.begin(),
+         FE = Functions.end(); FI != FE; ++FI) {
+    Function *F = *FI;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+        CallSite CS(cast<Value>(I));
+        if (CS && CS.getCalledFunction()
+            && !CS.getCalledFunction()->isDeclaration())
+          CallSites.push_back(CS);
+      }
+  }
+  
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+  
+  // Inline call sites.
+  bool Changed = false;
+  do {
+    Changed = false;
+    for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); 
+         ++index) {
+      CallSite CS = CallSites[index];
+      if (Function *Callee = CS.getCalledFunction()) {
+        
+        // Eliminate calls that are never inlinable.
+        if (Callee->isDeclaration() ||
+            CS.getInstruction()->getParent()->getParent() == Callee) {
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+          continue;
+        }
+        InlineCost IC = CA.getInlineCost(CS, NeverInline);
+        if (IC.isAlways()) {        
+          DEBUG(dbgs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
+        } else if (IC.isNever()) {
+          DEBUG(dbgs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
+          continue;
+        } else {
+          int Cost = IC.getValue();
+          
+          if (Cost >= (int) BasicInlineThreshold) {
+            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+            continue;
+          } else {
+            DEBUG(dbgs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+          }
+        }
+        
+        // Inline
+        InlineFunctionInfo IFI(0, TD);
+        if (InlineFunction(CS, IFI)) {
+          if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+                                      Callee->hasAvailableExternallyLinkage()))
+            DeadFunctions.insert(Callee);
+          Changed = true;
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+        }
+      }
+    }
+  } while (Changed);
+  
+  // Remove completely inlined functions from module.
+  for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+        E = DeadFunctions.end(); I != E; ++I) {
+    Function *D = *I;
+    Module *M = D->getParent();
+    M->getFunctionList().remove(D);
+  }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+  Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+  delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+  Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason. 
+void BasicInliner::neverInlineFunction(Function *F) {
+  Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+  Impl->inlineFunctions();
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 0000000..c052910
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,398 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block.  This pass may be "required" by passes that
+// cannot deal with critical edges.  For this usage, the structure type is
+// forward declared.  This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+  struct BreakCriticalEdges : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BreakCriticalEdges() : FunctionPass(ID) {
+      initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
+
+      // No loop canonicalization guarantees are broken by this pass.
+      AU.addPreservedID(LoopSimplifyID);
+    }
+  };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+                "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+  return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+  bool Changed = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+        if (SplitCriticalEdge(TI, i, this)) {
+          ++NumBroken;
+          Changed = true;
+        }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//    Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                          bool AllowIdenticalEdges) {
+  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  if (TI->getNumSuccessors() == 1) return false;
+
+  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+  // If there is more than one predecessor, this is a critical edge...
+  assert(I != E && "No preds, but we have an edge to the block?");
+  const BasicBlock *FirstPred = *I;
+  ++I;        // Skip one edge due to the incoming arc from TI.
+  if (!AllowIdenticalEdges)
+    return I != E;
+
+  // If AllowIdenticalEdges is true, then we allow this edge to be considered
+  // non-critical iff all preds come from TI's block.
+  while (I != E) {
+    const BasicBlock *P = *I;
+    if (P != FirstPred)
+      return true;
+    // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+    // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+    E = pred_end(P);
+    ++I;
+  }
+  return false;
+}
+
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge.  This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
+/// have many edges to any one destination.  This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges,
+                                    bool DontDeleteUselessPhis) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+
+  assert(!isa<IndirectBrInst>(TI) &&
+         "Cannot split critical edge from IndirectBrInst");
+
+  BasicBlock *TIBB = TI->getParent();
+  BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+  // it in this generic function.
+  if (DestBB->isLandingPad()) return 0;
+
+  // Create a new basic block, linking it into the CFG.
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+  // Create our unconditional branch.
+  BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+  NewBI->setDebugLoc(TI->getDebugLoc());
+
+  // Branch to the new block, breaking the edge.
+  TI->setSuccessor(SuccNum, NewBB);
+
+  // Insert the block into the function... right after the block TI lives in.
+  Function &F = *TIBB->getParent();
+  Function::iterator FBBI = TIBB;
+  F.getBasicBlockList().insert(++FBBI, NewBB);
+
+  // If there are any PHI nodes in DestBB, we need to update them so that they
+  // merge incoming values from NewBB instead of from TIBB.
+  {
+    unsigned BBIdx = 0;
+    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+      // We no longer enter through TIBB, now we come in through NewBB.
+      // Revector exactly one entry in the PHI node that used to come from
+      // TIBB to come from NewBB.
+      PHINode *PN = cast<PHINode>(I);
+
+      // Reuse the previous value of BBIdx if it lines up.  In cases where we
+      // have multiple phi nodes with *lots* of predecessors, this is a speed
+      // win because we don't have to scan the PHI looking for TIBB.  This
+      // happens because the BB list of PHI nodes are usually in the same
+      // order.
+      if (PN->getIncomingBlock(BBIdx) != TIBB)
+        BBIdx = PN->getBasicBlockIndex(TIBB);
+      PN->setIncomingBlock(BBIdx, NewBB);
+    }
+  }
+
+  // If there are any other edges from TIBB to DestBB, update those to go
+  // through the split block, making those edges non-critical as well (and
+  // reducing the number of phi entries in the DestBB if relevant).
+  if (MergeIdenticalEdges) {
+    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+      if (TI->getSuccessor(i) != DestBB) continue;
+
+      // Remove an entry for TIBB from DestBB phi nodes.
+      DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+
+      // We found another edge to DestBB, go to NewBB instead.
+      TI->setSuccessor(i, NewBB);
+    }
+  }
+
+
+
+  // If we don't have a pass object, we can't update anything...
+  if (P == 0) return NewBB;
+
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+
+  // If we have nothing to update, just return.
+  if (DT == 0 && LI == 0 && PI == 0)
+    return NewBB;
+
+  // Now update analysis information.  Since the only predecessor of NewBB is
+  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
+  // anything, as there are other successors of DestBB.  However, if all other
+  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+  // loop header) then NewBB dominates DestBB.
+  SmallVector<BasicBlock*, 8> OtherPreds;
+
+  // If there is a PHI in the block, loop over predecessors with it, which is
+  // faster than iterating pred_begin/end.
+  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingBlock(i) != NewBB)
+        OtherPreds.push_back(PN->getIncomingBlock(i));
+  } else {
+    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+         I != E; ++I) {
+      BasicBlock *P = *I;
+      if (P != NewBB)
+        OtherPreds.push_back(P);
+    }
+  }
+
+  bool NewBBDominatesDestBB = true;
+
+  // Should we update DominatorTree information?
+  if (DT) {
+    DomTreeNode *TINode = DT->getNode(TIBB);
+
+    // The new block is not the immediate dominator for any other nodes, but
+    // TINode is the immediate dominator for the new node.
+    //
+    if (TINode) {       // Don't break unreachable code!
+      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+      DomTreeNode *DestBBNode = 0;
+
+      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+      if (!OtherPreds.empty()) {
+        DestBBNode = DT->getNode(DestBB);
+        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+          OtherPreds.pop_back();
+        }
+        OtherPreds.clear();
+      }
+
+      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+      // doesn't dominate anything.
+      if (NewBBDominatesDestBB) {
+        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+        DT->changeImmediateDominator(DestBBNode, NewBBNode);
+      }
+    }
+  }
+
+  // Update LoopInfo if it is around.
+  if (LI) {
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
+      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NewBB joins loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (TIL->contains(DestLoop)) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (DestLoop->contains(TIL)) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == DestBB &&
+                 "Should not create irreducible loops!");
+          if (Loop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NewBB, LI->getBase());
+        }
+      }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        // FIXME: This code is functionally equivalent to the corresponding
+        // loop in LoopSimplify.
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I) {
+            BasicBlock *P = *I;
+            if (TIL->contains(P)) {
+              if (isa<IndirectBrInst>(P->getTerminator())) {
+                Preds.clear();
+                break;
+              }
+              Preds.push_back(P);
+            } else {
+              HasPredOutsideOfLoop = true;
+            }
+          }
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
+  }
+
+  // Update ProfileInfo if it is around.
+  if (PI)
+    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+  return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 0000000..4b5f45b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,479 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer.  This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+                                            TD->getIntPtrType(Context),
+                                            B.getInt8PtrTy(),
+                                            NULL);
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character.  Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI =
+    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+  Type *I8Ptr = B.getInt8PtrTy();
+  Type *I32Ty = B.getInt32Ty();
+  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+                                            I8Ptr, I8Ptr, I32Ty, NULL);
+  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+                               ConstantInt::get(I32Ty, C), "strchr");
+  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+                                          B.getInt32Ty(),
+                                          B.getInt8PtrTy(),
+                                          B.getInt8PtrTy(),
+                                          TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+                               CastToCStr(Ptr2, B), Len, "strncmp");
+
+  if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr, NULL);
+  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Name);
+  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                          I8Ptr, I8Ptr, I8Ptr,
+                                          Len->getType(), NULL);
+  CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Len, "strncpy");
+  if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+                           IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+                                         AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context),
+                                         TD->getIntPtrType(Context), NULL);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+  if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt32Ty(),
+                                         TD->getIntPtrType(Context),
+                                         NULL);
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+                                         B.getInt32Ty(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor').  This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type.  If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
+  char NameBuffer[20];
+  if (!Op->getType()->isDoubleTy()) {
+    // If we need to add a suffix, copy into NameBuffer.
+    unsigned NameLen = strlen(Name);
+    assert(NameLen < sizeof(NameBuffer)-2);
+    memcpy(NameBuffer, Name, NameLen);
+    if (Op->getType()->isFloatTy())
+      NameBuffer[NameLen] = 'f';  // floorf
+    else
+      NameBuffer[NameLen] = 'l';  // floorl
+    NameBuffer[NameLen+1] = 0;
+    Name = NameBuffer;
+  }
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                         Op->getType(), NULL);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+                                          B.getInt32Ty(), NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char,
+                              B.getInt32Ty(),
+                              /*isSigned*/true,
+                              "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       B.getInt32Ty(),
+                                       B.getInt8PtrTy(),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+                               B.getInt32Ty(),
+                               B.getInt32Ty(), File->getType(),
+                               NULL);
+  else
+    F = M->getOrInsertFunction("fputc",
+                               B.getInt32Ty(),
+                               B.getInt32Ty(),
+                               File->getType(), NULL);
+  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+                         "chari");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+                               B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                      IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+                               TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  // We really need TargetData for later.
+  if (!TD) return false;
+  
+  this->CI = CI;
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  FunctionType *FT = Callee->getFunctionType();
+  LLVMContext &Context = CI->getParent()->getContext();
+  IRBuilder<> B(CI);
+
+  if (Name == "__memcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  // Should be similar to memcpy.
+  if (Name == "__mempcpy_chk") {
+    return false;
+  }
+
+  if (Name == "__memmove_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__memset_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+                                   false);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+    
+    
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+                              Name.substr(2, 6));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcat_chk") {
+    return false;
+  }
+
+  if (Name == "__strncat_chk") {
+    return false;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 0000000..cf21f1e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,533 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner.  This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+                                  ValueToValueMapTy &VMap,
+                                  const Twine &NameSuffix, Function *F,
+                                  ClonedCodeInfo *CodeInfo) {
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+       II != IE; ++II) {
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(BB->getTerminator());
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+                                        BB != &BB->getParent()->getEntryBlock();
+  }
+  return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                             ValueToValueMapTy &VMap,
+                             bool ModuleLevelChanges,
+                             SmallVectorImpl<ReturnInst*> &Returns,
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); I != E; ++I)
+    assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+  // Clone any attributes.
+  if (NewFunc->arg_size() == OldFunc->arg_size())
+    NewFunc->copyAttributesFrom(OldFunc);
+  else {
+    //Some arguments were deleted with the VMap. Copy arguments one by one
+    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+           E = OldFunc->arg_end(); I != E; ++I)
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
+        Anew->addAttr( OldFunc->getAttributes()
+                       .getParamAttributes(I->getArgNo() + 1));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(0, OldFunc->getAttributes()
+                                     .getRetAttributes()));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(~0, OldFunc->getAttributes()
+                                     .getFnAttributes()));
+
+  }
+
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  //
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    const BasicBlock &BB = *BI;
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+    VMap[&BB] = CBB;                       // Add basic block mapping.
+
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
+  // Loop over all of the instructions in the function, fixing up operand
+  // references as we go.  This uses VMap to do all the hard work.
+  for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it...
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+      RemapInstruction(II, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module.  Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function.  The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+                              bool ModuleLevelChanges,
+                              ClonedCodeInfo *CodeInfo) {
+  std::vector<Type*> ArgTypes;
+
+  // The user might be deleting arguments to the function by specifying them in
+  // the VMap.  If so, we need to not add the arguments to the arg ty vector
+  //
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
+      ArgTypes.push_back(I->getType());
+
+  // Create a new function type...
+  FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+                                    ArgTypes, F->getFunctionType()->isVarArg());
+
+  // Create the new function...
+  Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+  // Loop over the arguments, copying the names of the mapped arguments over...
+  Function::arg_iterator DestI = NewF->arg_begin();
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0) {   // Is this argument preserved?
+      DestI->setName(I->getName()); // Copy the name over...
+      VMap[I] = DestI++;        // Add mapping to VMap
+    }
+
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+  CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+  return NewF;
+}
+
+
+
+namespace {
+  /// PruningFunctionCloner - This class is a private class used to implement
+  /// the CloneAndPruneFunctionInto method.
+  struct PruningFunctionCloner {
+    Function *NewFunc;
+    const Function *OldFunc;
+    ValueToValueMapTy &VMap;
+    bool ModuleLevelChanges;
+    SmallVectorImpl<ReturnInst*> &Returns;
+    const char *NameSuffix;
+    ClonedCodeInfo *CodeInfo;
+    const TargetData *TD;
+  public:
+    PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+                          ValueToValueMapTy &valueMap,
+                          bool moduleLevelChanges,
+                          SmallVectorImpl<ReturnInst*> &returns,
+                          const char *nameSuffix, 
+                          ClonedCodeInfo *codeInfo,
+                          const TargetData *td)
+    : NewFunc(newFunc), OldFunc(oldFunc),
+      VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+      Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+    }
+
+    /// CloneBlock - The specified block is found to be reachable, clone it and
+    /// anything that it can reach.
+    void CloneBlock(const BasicBlock *BB,
+                    std::vector<const BasicBlock*> &ToClone);
+    
+  public:
+    /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+    /// mapping its operands through VMap if they are available.
+    Constant *ConstantFoldMappedInstruction(const Instruction *I);
+  };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+                                       std::vector<const BasicBlock*> &ToClone){
+  TrackingVH<Value> &BBEntry = VMap[BB];
+
+  // Have we already cloned this block?
+  if (BBEntry) return;
+  
+  // Nope, clone it now.
+  BasicBlock *NewBB;
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
+  // loop doesn't include the terminator.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+       II != IE; ++II) {
+    // If this instruction constant folds, don't bother cloning the instruction,
+    // instead, just add the constant to the value map.
+    if (Constant *C = ConstantFoldMappedInstruction(II)) {
+      VMap[II] = C;
+      continue;
+    }
+
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  // Finally, clone over the terminator.
+  const TerminatorInst *OldTI = BB->getTerminator();
+  bool TerminatorDone = false;
+  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+    if (BI->isConditional()) {
+      // If the condition was a known constant in the callee...
+      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+      // Or is a known constant in the caller...
+      if (Cond == 0) {
+        Value *V = VMap[BI->getCondition()];
+        Cond = dyn_cast_or_null<ConstantInt>(V);
+      }
+
+      // Constant fold to uncond branch!
+      if (Cond) {
+        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+        ToClone.push_back(Dest);
+        TerminatorDone = true;
+      }
+    }
+  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+    // If switching on a value known constant in the caller.
+    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+    if (Cond == 0) { // Or known constant after constant prop in the callee...
+      Value *V = VMap[SI->getCondition()];
+      Cond = dyn_cast_or_null<ConstantInt>(V);
+    }
+    if (Cond) {     // Constant fold to uncond branch!
+      BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+      ToClone.push_back(Dest);
+      TerminatorDone = true;
+    }
+  }
+  
+  if (!TerminatorDone) {
+    Instruction *NewInst = OldTI->clone();
+    if (OldTI->hasName())
+      NewInst->setName(OldTI->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[OldTI] = NewInst;             // Add instruction map to value.
+    
+    // Recursively clone any reachable successor blocks.
+    const TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      ToClone.push_back(TI->getSuccessor(i));
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(OldTI);
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+      BB != &BB->getParent()->front();
+  }
+  
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+    Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through VMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+  SmallVector<Constant*, 8> Ops;
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+                                                           VMap,
+                  ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    if (!LI->isVolatile())
+      return ConstantFoldLoadFromConstPtr(Ops[0], TD);
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                                     ValueToValueMapTy &VMap,
+                                     bool ModuleLevelChanges,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
+                                     const char *NameSuffix, 
+                                     ClonedCodeInfo *CodeInfo,
+                                     const TargetData *TD,
+                                     Instruction *TheCall) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+  
+#ifndef NDEBUG
+  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); II != E; ++II)
+    assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+                            Returns, NameSuffix, CodeInfo, TD);
+
+  // Clone the entry block, and anything recursively reachable from it.
+  std::vector<const BasicBlock*> CloneWorklist;
+  CloneWorklist.push_back(&OldFunc->getEntryBlock());
+  while (!CloneWorklist.empty()) {
+    const BasicBlock *BB = CloneWorklist.back();
+    CloneWorklist.pop_back();
+    PFC.CloneBlock(BB, CloneWorklist);
+  }
+  
+  // Loop over all of the basic blocks in the old function.  If the block was
+  // reachable, we have cloned it and the old block is now in the value map:
+  // insert it into the new function in the right order.  If not, ignore it.
+  //
+  // Defer PHI resolution until rest of function is resolved.
+  SmallVector<const PHINode*, 16> PHIToResolve;
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    Value *V = VMap[BI];
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+    if (NewBB == 0) continue;  // Dead block.
+
+    // Add the new block to the new function.
+    NewFunc->getBasicBlockList().push_back(NewBB);
+    
+    // Loop over all of the instructions in the block, fixing up operand
+    // references as we go.  This uses VMap to do all the hard work.
+    //
+    BasicBlock::iterator I = NewBB->begin();
+
+    DebugLoc TheCallDL;
+    if (TheCall) 
+      TheCallDL = TheCall->getDebugLoc();
+    
+    // Handle PHI nodes specially, as we have to remove references to dead
+    // blocks.
+    if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      // Skip over all PHI nodes, remembering them for later.
+      BasicBlock::const_iterator OldI = BI->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+        PHIToResolve.push_back(cast<PHINode>(OldI));
+    }
+    
+    // Otherwise, remap the rest of the instructions normally.
+    for (; I != NewBB->end(); ++I)
+      RemapInstruction(I, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+  }
+  
+  // Defer PHI resolution until rest of function is resolved, PHI resolution
+  // requires the CFG to be up-to-date.
+  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+    const PHINode *OPN = PHIToResolve[phino];
+    unsigned NumPreds = OPN->getNumIncomingValues();
+    const BasicBlock *OldBB = OPN->getParent();
+    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+    // Map operands for blocks that are live and remove operands for blocks
+    // that are dead.
+    for (; phino != PHIToResolve.size() &&
+         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+      OPN = PHIToResolve[phino];
+      PHINode *PN = cast<PHINode>(VMap[OPN]);
+      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+        Value *V = VMap[PN->getIncomingBlock(pred)];
+        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  VMap, 
+                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+          assert(InVal && "Unknown input value?");
+          PN->setIncomingValue(pred, InVal);
+          PN->setIncomingBlock(pred, MappedBlock);
+        } else {
+          PN->removeIncomingValue(pred, false);
+          --pred, --e;  // Revisit the next entry.
+        }
+      } 
+    }
+    
+    // The loop above has removed PHI entries for those blocks that are dead
+    // and has updated others.  However, if a block is live (i.e. copied over)
+    // but its terminator has been changed to not go to this block, then our
+    // phi nodes will have invalid entries.  Update the PHI nodes in this
+    // case.
+    PHINode *PN = cast<PHINode>(NewBB->begin());
+    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+    if (NumPreds != PN->getNumIncomingValues()) {
+      assert(NumPreds < PN->getNumIncomingValues());
+      // Count how many times each predecessor comes to this block.
+      std::map<BasicBlock*, unsigned> PredCount;
+      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+           PI != E; ++PI)
+        --PredCount[*PI];
+      
+      // Figure out how many entries to remove from each PHI.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        ++PredCount[PN->getIncomingBlock(i)];
+      
+      // At this point, the excess predecessor entries are positive in the
+      // map.  Loop over all of the PHIs and remove excess predecessor
+      // entries.
+      BasicBlock::iterator I = NewBB->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+             E = PredCount.end(); PCI != E; ++PCI) {
+          BasicBlock *Pred     = PCI->first;
+          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+            PN->removeIncomingValue(Pred, false);
+        }
+      }
+    }
+    
+    // If the loops above have made these phi nodes have 0 or 1 operand,
+    // replace them with undef or the input value.  We must do this for
+    // correctness, because 0-operand phis are not valid.
+    PN = cast<PHINode>(NewBB->begin());
+    if (PN->getNumIncomingValues() == 0) {
+      BasicBlock::iterator I = NewBB->begin();
+      BasicBlock::const_iterator OldI = OldBB->begin();
+      while ((PN = dyn_cast<PHINode>(I++))) {
+        Value *NV = UndefValue::get(PN->getType());
+        PN->replaceAllUsesWith(NV);
+        assert(VMap[OldI] == PN && "VMap mismatch");
+        VMap[OldI] = NV;
+        PN->eraseFromParent();
+        ++OldI;
+      }
+    }
+    // NOTE: We cannot eliminate single entry phi nodes here, because of
+    // VMap.  Single entry phi nodes can have multiple VMap entries
+    // pointing at them.  Thus, deleting one would require scanning the VMap
+    // to update any entries in it that would require that.  This would be
+    // really slow.
+  }
+  
+  // Now that the inlined function body has been fully constructed, go through
+  // and zap unconditional fall-through branches.  This happen all the time when
+  // specializing code: code specialization turns conditional branches into
+  // uncond branches, and this code folds them.
+  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  while (I != NewFunc->end()) {
+    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+    if (!BI || BI->isConditional()) { ++I; continue; }
+    
+    // Note that we can't eliminate uncond branches if the destination has
+    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
+    // require scanning the VMap to update any entries that point to the phi
+    // node.
+    BasicBlock *Dest = BI->getSuccessor(0);
+    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+      ++I; continue;
+    }
+    
+    // We know all single-entry PHI nodes in the inlined function have been
+    // removed, so we just need to splice the blocks.
+    BI->eraseFromParent();
+    
+    // Make all PHI nodes that referred to Dest now refer to I as their source.
+    Dest->replaceAllUsesWith(I);
+
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
+    // Remove the dest block.
+    Dest->eraseFromParent();
+    
+    // Do not increment I, iteratively merge all things this block branches to.
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 0000000..a0e027b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,126 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module.  This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+  // Create the value map that maps things from the old module over to the new
+  // module.
+  ValueToValueMapTy VMap;
+  return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module.
+  Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+  New->setDataLayout(M->getDataLayout());
+  New->setTargetTriple(M->getTargetTriple());
+  New->setModuleInlineAsm(M->getModuleInlineAsm());
+   
+  // Copy all of the dependent libraries over.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    New->addLibrary(*I);
+
+  // Loop over all of the global variables, making corresponding globals in the
+  // new module.  Here we add them to the VMap and to the new Module.  We
+  // don't worry about attributes or initializers, they will come later.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
+                                            I->isConstant(), I->getLinkage(),
+                                            (Constant*) 0, I->getName(),
+                                            (GlobalVariable*) 0,
+                                            I->isThreadLocal(),
+                                            I->getType()->getAddressSpace());
+    GV->copyAttributesFrom(I);
+    VMap[I] = GV;
+  }
+
+  // Loop over the functions in the module, making external functions as before
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *NF =
+      Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+                       I->getLinkage(), I->getName(), New);
+    NF->copyAttributesFrom(I);
+    VMap[I] = NF;
+  }
+
+  // Loop over the aliases in the module
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I) {
+    GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
+                                      I->getName(), NULL, New);
+    GA->copyAttributesFrom(I);
+    VMap[I] = GA;
+  }
+  
+  // Now that all of the things that global variable initializer can refer to
+  // have been created, loop through and copy the global variable referrers
+  // over...  We also set the attributes on the global now.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+    if (I->hasInitializer())
+      GV->setInitializer(MapValue(I->getInitializer(), VMap));
+  }
+
+  // Similarly, copy over function bodies now...
+  //
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *F = cast<Function>(VMap[I]);
+    if (!I->isDeclaration()) {
+      Function::arg_iterator DestI = F->arg_begin();
+      for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+           ++J) {
+        DestI->setName(J->getName());
+        VMap[J] = DestI++;
+      }
+
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+      CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+    }
+  }
+
+  // And aliases
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I) {
+    GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+    if (const Constant *C = I->getAliasee())
+      GA->setAliasee(MapValue(C, VMap));
+  }
+
+  // And named metadata....
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+         E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode &NMD = *I;
+    NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+    for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+      NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
+  }
+
+  return New;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 0000000..5f47ebb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,796 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+                 cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+  class CodeExtractor {
+    typedef SetVector<Value*> Values;
+    SetVector<BasicBlock*> BlocksToExtract;
+    DominatorTree* DT;
+    bool AggregateArgs;
+    unsigned NumExitBlocks;
+    Type *RetTy;
+  public:
+    CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+      : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+    Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code);
+
+    bool isEligible(ArrayRef<BasicBlock*> code);
+
+  private:
+    /// definedInRegion - Return true if the specified value is defined in the
+    /// extracted region.
+    bool definedInRegion(Value *V) const {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    /// definedInCaller - Return true if the specified value is defined in the
+    /// function being code extracted, but not in the region being extracted.
+    /// These values must be passed in as live-ins to the function.
+    bool definedInCaller(Value *V) const {
+      if (isa<Argument>(V)) return true;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (!BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    void severSplitPHINodes(BasicBlock *&Header);
+    void splitReturnBlocks();
+    void findInputsOutputs(Values &inputs, Values &outputs);
+
+    Function *constructFunction(const Values &inputs,
+                                const Values &outputs,
+                                BasicBlock *header,
+                                BasicBlock *newRootNode, BasicBlock *newHeader,
+                                Function *oldFunction, Module *M);
+
+    void moveCodeToFunction(Function *newFunction);
+
+    void emitCallAndSwitchStatement(Function *newFunction,
+                                    BasicBlock *newHeader,
+                                    Values &inputs,
+                                    Values &outputs);
+
+  };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+  unsigned NumPredsFromRegion = 0;
+  unsigned NumPredsOutsideRegion = 0;
+
+  if (Header != &Header->getParent()->getEntryBlock()) {
+    PHINode *PN = dyn_cast<PHINode>(Header->begin());
+    if (!PN) return;  // No PHI nodes.
+
+    // If the header node contains any PHI nodes, check to see if there is more
+    // than one entry from outside the region.  If so, we need to sever the
+    // header block into two.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+        ++NumPredsFromRegion;
+      else
+        ++NumPredsOutsideRegion;
+
+    // If there is one (or fewer) predecessor from outside the region, we don't
+    // need to do anything special.
+    if (NumPredsOutsideRegion <= 1) return;
+  }
+
+  // Otherwise, we need to split the header block into two pieces: one
+  // containing PHI nodes merging values from outside of the region, and a
+  // second that contains all of the code for the block and merges back any
+  // incoming values from inside of the region.
+  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+  BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+                                              Header->getName()+".ce");
+
+  // We only want to code extract the second block now, and it becomes the new
+  // header of the region.
+  BasicBlock *OldPred = Header;
+  BlocksToExtract.remove(OldPred);
+  BlocksToExtract.insert(NewBB);
+  Header = NewBB;
+
+  // Okay, update dominator sets. The blocks that dominate the new one are the
+  // blocks that dominate TIBB plus the new block itself.
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Okay, now we need to adjust the PHI nodes and any branches from within the
+  // region to go to the new header block instead of the old header block.
+  if (NumPredsFromRegion) {
+    PHINode *PN = cast<PHINode>(OldPred->begin());
+    // Loop over all of the predecessors of OldPred that are in the region,
+    // changing them to branch to NewBB instead.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+        TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+        TI->replaceUsesOfWith(OldPred, NewBB);
+      }
+
+    // Okay, everything within the region is now branching to the right block, we
+    // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+    for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+      PHINode *PN = cast<PHINode>(AfterPHIs);
+      // Create a new PHI node in the new region, which has an incoming value
+      // from OldPred of PN.
+      PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+                                       PN->getName()+".ce", NewBB->begin());
+      NewPN->addIncoming(PN, OldPred);
+
+      // Loop over all of the incoming value in PN, moving them to NewPN if they
+      // are from the extracted region.
+      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+          PN->removeIncomingValue(i);
+          --i;
+        }
+      }
+    }
+  }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+  for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+         E = BlocksToExtract.end(); I != E; ++I)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node dominates all other nodes dominated
+        // by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+  std::set<BasicBlock*> ExitBlocks;
+  for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+       ce = BlocksToExtract.end(); ci != ce; ++ci) {
+    BasicBlock *BB = *ci;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // If a used value is defined outside the region, it's an input.  If an
+      // instruction is used outside the region, it's an output.
+      for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+        if (definedInCaller(*O))
+          inputs.insert(*O);
+
+      // Consider uses of this instruction (outputs).
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (!definedInRegion(*UI)) {
+          outputs.insert(I);
+          break;
+        }
+    } // for: insts
+
+    // Keep track of the exit blocks from the region.
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i)))
+        ExitBlocks.insert(TI->getSuccessor(i));
+  } // for: basic blocks
+
+  NumExitBlocks = ExitBlocks.size();
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+                                           const Values &outputs,
+                                           BasicBlock *header,
+                                           BasicBlock *newRootNode,
+                                           BasicBlock *newHeader,
+                                           Function *oldFunction,
+                                           Module *M) {
+  DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+  // This function returns unsigned, outputs will go back by reference.
+  switch (NumExitBlocks) {
+  case 0:
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
+  }
+
+  std::vector<Type*> paramTy;
+
+  // Add the types of the input values to the function's argument list
+  for (Values::const_iterator i = inputs.begin(),
+         e = inputs.end(); i != e; ++i) {
+    const Value *value = *i;
+    DEBUG(dbgs() << "value used in func: " << *value << "\n");
+    paramTy.push_back(value->getType());
+  }
+
+  // Add the types of the output values to the function's argument list.
+  for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+       I != E; ++I) {
+    DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+    if (AggregateArgs)
+      paramTy.push_back((*I)->getType());
+    else
+      paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+  }
+
+  DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+  for (std::vector<Type*>::iterator i = paramTy.begin(),
+         e = paramTy.end(); i != e; ++i)
+    DEBUG(dbgs() << **i << ", ");
+  DEBUG(dbgs() << ")\n");
+
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+    paramTy.clear();
+    paramTy.push_back(StructPtr);
+  }
+  FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
+
+  // Create the new function
+  Function *newFunction = Function::Create(funcType,
+                                           GlobalValue::InternalLinkage,
+                                           oldFunction->getName() + "_" +
+                                           header->getName(), M);
+  // If the old function is no-throw, so is the new one.
+  if (oldFunction->doesNotThrow())
+    newFunction->setDoesNotThrow(true);
+  
+  newFunction->getBasicBlockList().push_back(newRootNode);
+
+  // Create an iterator to name all of the arguments we inserted.
+  Function::arg_iterator AI = newFunction->arg_begin();
+
+  // Rewrite all users of the inputs in the extracted region to use the
+  // arguments (or appropriate addressing into struct) instead.
+  for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+    Value *RewriteVal;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+      TerminatorInst *TI = newFunction->begin()->getTerminator();
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+    } else
+      RewriteVal = AI++;
+
+    std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+    for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+         use != useE; ++use)
+      if (Instruction* inst = dyn_cast<Instruction>(*use))
+        if (BlocksToExtract.count(inst->getParent()))
+          inst->replaceUsesOfWith(inputs[i], RewriteVal);
+  }
+
+  // Set names for input and output arguments.
+  if (!AggregateArgs) {
+    AI = newFunction->arg_begin();
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+      AI->setName(inputs[i]->getName());
+    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+      AI->setName(outputs[i]->getName()+".out");
+  }
+
+  // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+  // within the new function. This must be done before we lose track of which
+  // blocks were originally in the code region.
+  std::vector<User*> Users(header->use_begin(), header->use_end());
+  for (unsigned i = 0, e = Users.size(); i != e; ++i)
+    // The BasicBlock which contains the branch is not in the region
+    // modify the branch target to a new block
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+      if (!BlocksToExtract.count(TI->getParent()) &&
+          TI->getParent()->getParent() == oldFunction)
+        TI->replaceUsesOfWith(header, newHeader);
+
+  return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+                           Values &inputs, Values &outputs) {
+  // Emit a call to the new function, passing in: *pointer to struct (if
+  // aggregating parameters), or plan inputs and allocated memory for outputs
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
+
+  // Add inputs as params, or to be filled into the struct
+  for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+    if (AggregateArgs)
+      StructValues.push_back(*i);
+    else
+      params.push_back(*i);
+
+  // Create allocas for the outputs
+  for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+    if (AggregateArgs) {
+      StructValues.push_back(*i);
+    } else {
+      AllocaInst *alloca =
+        new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+                       codeReplacer->getParent()->begin()->begin());
+      ReloadOutputs.push_back(alloca);
+      params.push_back(alloca);
+    }
+  }
+
+  AllocaInst *Struct = 0;
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    std::vector<Type*> ArgTypes;
+    for (Values::iterator v = StructValues.begin(),
+           ve = StructValues.end(); v != ve; ++v)
+      ArgTypes.push_back((*v)->getType());
+
+    // Allocate a struct at the beginning of this function
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+    Struct =
+      new AllocaInst(StructArgTy, 0, "structArg",
+                     codeReplacer->getParent()->begin()->begin());
+    params.push_back(Struct);
+
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+      GetElementPtrInst *GEP =
+        GetElementPtrInst::Create(Struct, Idx,
+                                  "gep_" + StructValues[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      StoreInst *SI = new StoreInst(StructValues[i], GEP);
+      codeReplacer->getInstList().push_back(SI);
+    }
+  }
+
+  // Emit the call to the function
+  CallInst *call = CallInst::Create(newFunction, params,
+                                    NumExitBlocks > 1 ? "targetBlock" : "");
+  codeReplacer->getInstList().push_back(call);
+
+  Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+  unsigned FirstOut = inputs.size();
+  if (!AggregateArgs)
+    std::advance(OutputArgBegin, inputs.size());
+
+  // Reload the outputs passed in by reference
+  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+    Value *Output = 0;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      GetElementPtrInst *GEP
+        = GetElementPtrInst::Create(Struct, Idx,
+                                    "gep_reload_" + outputs[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      Output = GEP;
+    } else {
+      Output = ReloadOutputs[i];
+    }
+    LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
+    codeReplacer->getInstList().push_back(load);
+    std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+    for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+      Instruction *inst = cast<Instruction>(Users[u]);
+      if (!BlocksToExtract.count(inst->getParent()))
+        inst->replaceUsesOfWith(outputs[i], load);
+    }
+  }
+
+  // Now we can emit a switch statement using the call as a value.
+  SwitchInst *TheSwitch =
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+                         codeReplacer, 0, codeReplacer);
+
+  // Since there may be multiple exits from the original region, make the new
+  // function return an unsigned, switch on that number.  This loop iterates
+  // over all of the blocks in the extracted region, updating any terminator
+  // instructions in the to-be-extracted region that branch to blocks that are
+  // not in the region to be extracted.
+  std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+  unsigned switchVal = 0;
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    TerminatorInst *TI = (*i)->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+        BasicBlock *OldTarget = TI->getSuccessor(i);
+        // add a new basic block which returns the appropriate value
+        BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+        if (!NewTarget) {
+          // If we don't already have an exit stub for this non-extracted
+          // destination, create one now!
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
+                                         newFunction);
+          unsigned SuccNum = switchVal++;
+
+          Value *brVal = 0;
+          switch (NumExitBlocks) {
+          case 0:
+          case 1: break;  // No value needed.
+          case 2:         // Conditional branch, return a bool
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+            break;
+          default:
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+            break;
+          }
+
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+          // Update the switch instruction.
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
+                             OldTarget);
+
+          // Restore values just before we exit
+          Function::arg_iterator OAI = OutputArgBegin;
+          for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+            // For an invoke, the normal destination is the only one that is
+            // dominated by the result of the invocation
+            BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+            bool DominatesDef = true;
+
+            if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+              DefBlock = Invoke->getNormalDest();
+
+              // Make sure we are looking at the original successor block, not
+              // at a newly inserted exit block, which won't be in the dominator
+              // info.
+              for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+                     ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+                if (DefBlock == I->second) {
+                  DefBlock = I->first;
+                  break;
+                }
+
+              // In the extract block case, if the block we are extracting ends
+              // with an invoke instruction, make sure that we don't emit a
+              // store of the invoke value for the unwind block.
+              if (!DT && DefBlock != OldTarget)
+                DominatesDef = false;
+            }
+
+            if (DT) {
+              DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
+
+            if (DominatesDef) {
+              if (AggregateArgs) {
+                Value *Idx[2];
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
+                GetElementPtrInst *GEP =
+                  GetElementPtrInst::Create(OAI, Idx,
+                                            "gep_" + outputs[out]->getName(),
+                                            NTRet);
+                new StoreInst(outputs[out], GEP, NTRet);
+              } else {
+                new StoreInst(outputs[out], OAI, NTRet);
+              }
+            }
+            // Advance output iterator even if we don't emit a store
+            if (!AggregateArgs) ++OAI;
+          }
+        }
+
+        // rewrite the original branch instruction with this new target
+        TI->setSuccessor(i, NewTarget);
+      }
+  }
+
+  // Now that we've done the deed, simplify the switch instruction.
+  Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+  switch (NumExitBlocks) {
+  case 0:
+    // There are no successors (the block containing the switch itself), which
+    // means that previously this was the last part of the function, and hence
+    // this should be rewritten as a `ret'
+
+    // Check if the function should return a value
+    if (OldFnRetTy->isVoidTy()) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
+    } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+      // return what we have
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+    } else {
+      // Otherwise we must have code extracted an unwind or something, just
+      // return whatever we want.
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
+    }
+
+    TheSwitch->eraseFromParent();
+    break;
+  case 1:
+    // Only a single destination, change the switch into an unconditional
+    // branch.
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  case 2:
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+                       call, TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  default:
+    // Otherwise, make the default destination of the switch instruction be one
+    // of the other successors.
+    TheSwitch->setOperand(0, call);
+    TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+    TheSwitch->removeCase(NumExitBlocks);  // Remove redundant case
+    break;
+  }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+  Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+  Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+  Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    // Delete the basic block from the old function, and the list of blocks
+    oldBlocks.remove(*i);
+
+    // Insert this basic block into the new function
+    newBlocks.push_back(*i);
+  }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+///             add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(ArrayRef<BasicBlock*> code) {
+  if (!isEligible(code))
+    return 0;
+
+  // 1) Find inputs, outputs
+  // 2) Construct new function
+  //  * Add allocas for defs, pass as args by reference
+  //  * Pass in uses as args
+  // 3) Move code region, add call instr to func
+  //
+  BlocksToExtract.insert(code.begin(), code.end());
+
+  Values inputs, outputs;
+
+  // Assumption: this is a single-entry code region, and the header is the first
+  // block in the region.
+  BasicBlock *header = code[0];
+
+  for (unsigned i = 1, e = code.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+         PI != E; ++PI)
+      assert(BlocksToExtract.count(*PI) &&
+             "No blocks in this region may have entries from outside the region"
+             " except for the first block!");
+
+  // If we have to split PHI nodes or the entry block, do so now.
+  severSplitPHINodes(header);
+
+  // If we have any return instructions in the region, split those blocks so
+  // that the return is not in the region.
+  splitReturnBlocks();
+
+  Function *oldFunction = header->getParent();
+
+  // This takes place of the original loop
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
+                                                header);
+
+  // The new function needs a root node because other nodes can branch to the
+  // head of the region, but the entry node of a function cannot have preds.
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
+  newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+  // Find inputs to, outputs from the code region.
+  findInputsOutputs(inputs, outputs);
+
+  // Construct new function based on inputs/outputs & add allocas for all defs.
+  Function *newFunction = constructFunction(inputs, outputs, header,
+                                            newFuncRoot,
+                                            codeReplacer, oldFunction,
+                                            oldFunction->getParent());
+
+  emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+  moveCodeToFunction(newFunction);
+
+  // Loop over all of the PHI nodes in the header block, and change any
+  // references to the old incoming edge to be the new incoming edge.
+  for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+        PN->setIncomingBlock(i, newFuncRoot);
+  }
+
+  // Look at all successors of the codeReplacer block.  If any of these blocks
+  // had PHI nodes in them, we need to update the "from" block to be the code
+  // replacer, not the original block in the extracted region.
+  std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+                                 succ_end(codeReplacer));
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      std::set<BasicBlock*> ProcessedPreds;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+            PN->setIncomingBlock(i, codeReplacer);
+          else {
+            // There were multiple entries in the PHI for this block, now there
+            // is only one, so remove the duplicated entries.
+            PN->removeIncomingValue(i, false);
+            --i; --e;
+          }
+        }
+    }
+
+  //cerr << "NEW FUNCTION: " << *newFunction;
+  //  verifyFunction(*newFunction);
+
+  //  cerr << "OLD FUNCTION: " << *oldFunction;
+  //  verifyFunction(*oldFunction);
+
+  DEBUG(if (verifyFunction(*newFunction)) 
+        report_fatal_error("verifyFunction failed!"));
+  return newFunction;
+}
+
+bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) {
+  // Deny a single basic block that's a landing pad block.
+  if (code.size() == 1 && code[0]->isLandingPad())
+    return false;
+
+  // Deny code region if it contains allocas or vastarts.
+  for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end();
+       BB != e; ++BB)
+    for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+         I != Ie; ++I)
+      if (isa<AllocaInst>(*I))
+        return false;
+      else if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (const Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::vastart)
+            return false;
+  return true;
+}
+
+
+/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new
+/// function.
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+                                  ArrayRef<BasicBlock*> code,
+                                  bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractLoop - Slurp a natural loop into a brand new function.
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - Slurp a basic block into a brand new function.
+///
+Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){
+  return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 0000000..8cc2649
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,146 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack().  This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted.  After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca.  This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value.  It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+                                   Instruction *AllocaPoint) {
+  if (I.use_empty()) {
+    I.eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = I.getParent()->getParent();
+    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Change all of the users of the instruction to read from the stack slot
+  // instead.
+  while (!I.use_empty()) {
+    Instruction *U = cast<Instruction>(I.use_back());
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If this is a PHI node, we can't insert a load of the value before the
+      // use.  Instead, insert the load in the predecessor block corresponding
+      // to the incoming value.
+      //
+      // Note that if there are multiple edges from a basic block to this PHI
+      // node that we cannot multiple loads.  The problem is that the resultant
+      // PHI node will have multiple values (from each load) coming in from the
+      // same block, which is illegal SSA form.  For this reason, we keep track
+      // and reuse loads we insert.
+      std::map<BasicBlock*, Value*> Loads;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I) {
+          Value *&V = Loads[PN->getIncomingBlock(i)];
+          if (V == 0) {
+            // Insert the load into the predecessor block
+            V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+                             PN->getIncomingBlock(i)->getTerminator());
+          }
+          PN->setIncomingValue(i, V);
+        }
+
+    } else {
+      // If this is a normal instruction, just insert a load.
+      Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+      U->replaceUsesOfWith(&I, V);
+    }
+  }
+
+
+  // Insert stores of the computed value into the stack slot.  We have to be
+  // careful is I is an invoke instruction though, because we can't insert the
+  // store AFTER the terminator instruction.
+  BasicBlock::iterator InsertPt;
+  if (!isa<TerminatorInst>(I)) {
+    InsertPt = &I;
+    ++InsertPt;
+  } else {
+    // We cannot demote invoke instructions to the stack if their normal edge
+    // is critical.
+    InvokeInst &II = cast<InvokeInst>(I);
+    assert(II.getNormalDest()->getSinglePredecessor() &&
+           "Cannot demote invoke with a critical successor!");
+    InsertPt = II.getNormalDest()->begin();
+  }
+
+  for (; isa<PHINode>(InsertPt); ++InsertPt)
+  /* empty */;   // Don't insert before any PHI nodes.
+  new StoreInst(&I, Slot, InsertPt);
+
+  return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+  if (P->use_empty()) {
+    P->eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = P->getParent()->getParent();
+    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Iterate over each operand, insert store in each predecessor.
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+      assert(II->getParent() != P->getIncomingBlock(i) &&
+             "Invoke edge not supported yet"); (void)II;
+    }
+    new StoreInst(P->getIncomingValue(i), Slot,
+                  P->getIncomingBlock(i)->getTerminator());
+  }
+
+  // Insert load in place of the phi and replace all uses.
+  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  P->replaceAllUsesWith(V);
+
+  // Delete phi.
+  P->eraseFromParent();
+
+  return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 0000000..5464dbc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,1318 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+// The code in this file for handling inlines through invoke
+// instructions preserves semantics only under some assumptions about
+// the behavior of unwinders which correspond to gcc-style libUnwind
+// exception personality functions.  Eventually the IR will be
+// improved to make this unnecessary, but until then, this code is
+// marked [LIBUNWIND].
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(CI), IFI);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(II), IFI);
+}
+
+// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is
+// turned on.
+
+/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
+static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
+  for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
+    EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
+    if (exn) return exn;
+  }
+
+  return 0;
+}
+
+/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
+/// the given llvm.eh.exception call.
+static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) {
+  BasicBlock *exnBlock = exn->getParent();
+
+  EHSelectorInst *outOfBlockSelector = 0;
+  for (Instruction::use_iterator
+         ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
+    EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
+    if (!sel) continue;
+
+    // Immediately accept an eh.selector in the same block as the
+    // excepton call.
+    if (sel->getParent() == exnBlock) return sel;
+
+    // Otherwise, use the first selector we see.
+    if (!outOfBlockSelector) outOfBlockSelector = sel;
+  }
+
+  return outOfBlockSelector;
+}
+
+/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
+/// in the given landing pad.  In principle, llvm.eh.exception is
+/// required to be in the landing pad; in practice, SplitCriticalEdge
+/// can break that invariant, and then inlining can break it further.
+/// There's a real need for a reliable solution here, but until that
+/// happens, we have some fragile workarounds here.
+static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
+  // Look for an exception call in the actual landing pad.
+  EHExceptionInst *exn = findExceptionInBlock(lpad);
+  if (exn) return findSelectorForException(exn);
+
+  // Okay, if that failed, look for one in an obvious successor.  If
+  // we find one, we'll fix the IR by moving things back to the
+  // landing pad.
+
+  bool dominates = true; // does the lpad dominate the exn call
+  BasicBlock *nonDominated = 0; // if not, the first non-dominated block
+  BasicBlock *lastDominated = 0; // and the block which branched to it
+
+  BasicBlock *exnBlock = lpad;
+
+  // We need to protect against lpads that lead into infinite loops.
+  SmallPtrSet<BasicBlock*,4> visited;
+  visited.insert(exnBlock);
+
+  do {
+    // We're not going to apply this hack to anything more complicated
+    // than a series of unconditional branches, so if the block
+    // doesn't terminate in an unconditional branch, just fail.  More
+    // complicated cases can arise when, say, sinking a call into a
+    // split unwind edge and then inlining it; but that can do almost
+    // *anything* to the CFG, including leaving the selector
+    // completely unreachable.  The only way to fix that properly is
+    // to (1) prohibit transforms which move the exception or selector
+    // values away from the landing pad, e.g. by producing them with
+    // instructions that are pinned to an edge like a phi, or
+    // producing them with not-really-instructions, and (2) making
+    // transforms which split edges deal with that.
+    BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back());
+    if (!branch || branch->isConditional()) return 0;
+
+    BasicBlock *successor = branch->getSuccessor(0);
+
+    // Fail if we found an infinite loop.
+    if (!visited.insert(successor)) return 0;
+
+    // If the successor isn't dominated by exnBlock:
+    if (!successor->getSinglePredecessor()) {
+      // We don't want to have to deal with threading the exception
+      // through multiple levels of phi, so give up if we've already
+      // followed a non-dominating edge.
+      if (!dominates) return 0;
+
+      // Otherwise, remember this as a non-dominating edge.
+      dominates = false;
+      nonDominated = successor;
+      lastDominated = exnBlock;
+    }
+
+    exnBlock = successor;
+
+    // Can we stop here?
+    exn = findExceptionInBlock(exnBlock);
+  } while (!exn);
+
+  // Look for a selector call for the exception we found.
+  EHSelectorInst *selector = findSelectorForException(exn);
+  if (!selector) return 0;
+
+  // The easy case is when the landing pad still dominates the
+  // exception call, in which case we can just move both calls back to
+  // the landing pad.
+  if (dominates) {
+    selector->moveBefore(lpad->getFirstNonPHI());
+    exn->moveBefore(selector);
+    return selector;
+  }
+
+  // Otherwise, we have to split at the first non-dominating block.
+  // The CFG looks basically like this:
+  //    lpad:
+  //      phis_0
+  //      insnsAndBranches_1
+  //      br label %nonDominated
+  //    nonDominated:
+  //      phis_2
+  //      insns_3
+  //      %exn = call i8* @llvm.eh.exception()
+  //      insnsAndBranches_4
+  //      %selector = call @llvm.eh.selector(i8* %exn, ...
+  // We need to turn this into:
+  //    lpad:
+  //      phis_0
+  //      %exn0 = call i8* @llvm.eh.exception()
+  //      %selector0 = call @llvm.eh.selector(i8* %exn0, ...
+  //      insnsAndBranches_1
+  //      br label %split // from lastDominated
+  //    nonDominated:
+  //      phis_2 (without edge from lastDominated)
+  //      %exn1 = call i8* @llvm.eh.exception()
+  //      %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
+  //      br label %split
+  //    split:
+  //      phis_2 (edge from lastDominated, edge from split)
+  //      %exn = phi ...
+  //      %selector = phi ...
+  //      insns_3
+  //      insnsAndBranches_4
+
+  assert(nonDominated);
+  assert(lastDominated);
+
+  // First, make clones of the intrinsics to go in lpad.
+  EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone());
+  EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone());
+  lpadSelector->setArgOperand(0, lpadExn);
+  lpadSelector->insertBefore(lpad->getFirstNonPHI());
+  lpadExn->insertBefore(lpadSelector);
+
+  // Split the non-dominated block.
+  BasicBlock *split =
+    nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(),
+                                  nonDominated->getName() + ".lpad-fix");
+
+  // Redirect the last dominated branch there.
+  cast<BranchInst>(lastDominated->back()).setSuccessor(0, split);
+
+  // Move the existing intrinsics to the end of the old block.
+  selector->moveBefore(&nonDominated->back());
+  exn->moveBefore(selector);
+
+  Instruction *splitIP = &split->front();
+
+  // For all the phis in nonDominated, make a new phi in split to join
+  // that phi with the edge from lastDominated.
+  for (BasicBlock::iterator
+         i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) {
+    PHINode *phi = dyn_cast<PHINode>(i);
+    if (!phi) break;
+
+    PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(),
+                                        splitIP);
+    phi->replaceAllUsesWith(splitPhi);
+    splitPhi->addIncoming(phi, nonDominated);
+    splitPhi->addIncoming(phi->removeIncomingValue(lastDominated),
+                          lastDominated);
+  }
+
+  // Make new phis for the exception and selector.
+  PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP);
+  exn->replaceAllUsesWith(exnPhi);
+  selector->setArgOperand(0, exn); // except for this use
+  exnPhi->addIncoming(exn, nonDominated);
+  exnPhi->addIncoming(lpadExn, lastDominated);
+
+  PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP);
+  selector->replaceAllUsesWith(selectorPhi);
+  selectorPhi->addIncoming(selector, nonDominated);
+  selectorPhi->addIncoming(lpadSelector, lastDominated);
+
+  return lpadSelector;
+}
+
+namespace {
+  /// A class for recording information about inlining through an invoke.
+  class InvokeInliningInfo {
+    BasicBlock *OuterUnwindDest;
+    EHSelectorInst *OuterSelector;
+    BasicBlock *InnerUnwindDest;
+    PHINode *InnerExceptionPHI;
+    PHINode *InnerSelectorPHI;
+    SmallVector<Value*, 8> UnwindDestPHIValues;
+
+    // FIXME: New EH - These will replace the analogous ones above.
+    BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind.
+    BasicBlock *InnerResumeDest; //< Destination for the callee's resume.
+    LandingPadInst *CallerLPad;  //< LandingPadInst associated with the invoke.
+    PHINode *InnerEHValuesPHI;   //< PHI for EH values from landingpad insts.
+
+  public:
+    InvokeInliningInfo(InvokeInst *II)
+      : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
+        InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0),
+        OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+        CallerLPad(0), InnerEHValuesPHI(0) {
+      // If there are PHI nodes in the unwind destination block, we need to keep
+      // track of which values came into them from the invoke before removing
+      // the edge from this block.
+      llvm::BasicBlock *InvokeBB = II->getParent();
+      BasicBlock::iterator I = OuterUnwindDest->begin();
+      for (; isa<PHINode>(I); ++I) {
+        // Save the value to use for this edge.
+        PHINode *PHI = cast<PHINode>(I);
+        UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+      }
+
+      // FIXME: With the new EH, this if/dyn_cast should be a 'cast'.
+      if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) {
+        CallerLPad = LPI;
+      }
+    }
+
+    /// The outer unwind destination is the target of unwind edges
+    /// introduced for calls within the inlined function.
+    BasicBlock *getOuterUnwindDest() const {
+      return OuterUnwindDest;
+    }
+
+    EHSelectorInst *getOuterSelector() {
+      if (!OuterSelector)
+        OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
+      return OuterSelector;
+    }
+
+    BasicBlock *getInnerUnwindDest();
+
+    // FIXME: New EH - Rename when new EH is turned on.
+    BasicBlock *getInnerUnwindDestNewEH();
+
+    LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
+    bool forwardEHResume(CallInst *call, BasicBlock *src);
+
+    /// forwardResume - Forward the 'resume' instruction to the caller's landing
+    /// pad block. When the landing pad block has only one predecessor, this is
+    /// a simple branch. When there is more than one predecessor, we need to
+    /// split the landing pad block after the landingpad instruction and jump
+    /// to there.
+    void forwardResume(ResumeInst *RI);
+
+    /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
+    /// destination block for the given basic block, using the values for the
+    /// original invoke's source block.
+    void addIncomingPHIValuesFor(BasicBlock *BB) const {
+      addIncomingPHIValuesForInto(BB, OuterUnwindDest);
+    }
+
+    void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+      BasicBlock::iterator I = dest->begin();
+      for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+        PHINode *phi = cast<PHINode>(I);
+        phi->addIncoming(UnwindDestPHIValues[i], src);
+      }
+    }
+  };
+}
+
+/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to
+/// llvm.eh.resume.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
+  if (InnerUnwindDest) return InnerUnwindDest;
+
+  // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
+  // in the outer landing pad to immediately following the phis.
+  EHSelectorInst *selector = getOuterSelector();
+  if (!selector) return 0;
+
+  // The call to llvm.eh.exception *must* be in the landing pad.
+  Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
+  assert(exn->getParent() == OuterUnwindDest);
+
+  // TODO: recognize when we've already done this, so that we don't
+  // get a linear number of these when inlining calls into lots of
+  // invokes with the same landing pad.
+
+  // Do the hoisting.
+  Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
+  assert(splitPoint != selector && "selector-on-exception dominance broken!");
+  if (splitPoint == exn) {
+    selector->removeFromParent();
+    selector->insertAfter(exn);
+    splitPoint = selector->getNextNode();
+  } else {
+    exn->moveBefore(splitPoint);
+    selector->moveBefore(splitPoint);
+  }
+
+  // Split the landing pad.
+  InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
+                                        OuterUnwindDest->getName() + ".body");
+
+  // The number of incoming edges we expect to the inner landing pad.
+  const unsigned phiCapacity = 2;
+
+  // Create corresponding new phis for all the phis in the outer landing pad.
+  BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
+  BasicBlock::iterator I = OuterUnwindDest->begin();
+  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+    PHINode *outerPhi = cast<PHINode>(I);
+    PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
+                                        outerPhi->getName() + ".lpad-body",
+                                        insertPoint);
+    outerPhi->replaceAllUsesWith(innerPhi);
+    innerPhi->addIncoming(outerPhi, OuterUnwindDest);
+  }
+
+  // Create a phi for the exception value...
+  InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
+                                      "exn.lpad-body", insertPoint);
+  exn->replaceAllUsesWith(InnerExceptionPHI);
+  selector->setArgOperand(0, exn); // restore this use
+  InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
+
+  // ...and the selector.
+  InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
+                                     "selector.lpad-body", insertPoint);
+  selector->replaceAllUsesWith(InnerSelectorPHI);
+  InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
+
+  // All done.
+  return InnerUnwindDest;
+}
+
+/// [LIBUNWIND] Try to forward the given call, which logically occurs
+/// at the end of the given block, as a branch to the inner unwind
+/// block.  Returns true if the call was forwarded.
+bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
+  // First, check whether this is a call to the intrinsic.
+  Function *fn = dyn_cast<Function>(call->getCalledValue());
+  if (!fn || fn->getName() != "llvm.eh.resume")
+    return false;
+  
+  // At this point, we need to return true on all paths, because
+  // otherwise we'll construct an invoke of the intrinsic, which is
+  // not well-formed.
+
+  // Try to find or make an inner unwind dest, which will fail if we
+  // can't find a selector call for the outer unwind dest.
+  BasicBlock *dest = getInnerUnwindDest();
+  bool hasSelector = (dest != 0);
+
+  // If we failed, just use the outer unwind dest, dropping the
+  // exception and selector on the floor.
+  if (!hasSelector)
+    dest = OuterUnwindDest;
+
+  // Make a branch.
+  BranchInst::Create(dest, src);
+
+  // Update the phis in the destination.  They were inserted in an
+  // order which makes this work.
+  addIncomingPHIValuesForInto(src, dest);
+
+  if (hasSelector) {
+    InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
+    InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
+  }
+
+  return true;
+}
+
+/// Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
+  // FIXME: New EH - rename this function when new EH is turned on.
+  if (InnerResumeDest) return InnerResumeDest;
+
+  // Split the landing pad.
+  BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+  InnerResumeDest =
+    OuterResumeDest->splitBasicBlock(SplitPoint,
+                                     OuterResumeDest->getName() + ".body");
+
+  // The number of incoming edges we expect to the inner landing pad.
+  const unsigned PHICapacity = 2;
+
+  // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+  BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+  BasicBlock::iterator I = OuterResumeDest->begin();
+  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+    PHINode *OuterPHI = cast<PHINode>(I);
+    PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+                                        OuterPHI->getName() + ".lpad-body",
+                                        InsertPoint);
+    OuterPHI->replaceAllUsesWith(InnerPHI);
+    InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+  }
+
+  // Create a PHI for the exception values.
+  InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+                                     "eh.lpad-body", InsertPoint);
+  CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+  InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+  // All done.
+  return InnerResumeDest;
+}
+
+/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
+/// block. When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+  BasicBlock *Dest = getInnerUnwindDestNewEH();
+  BasicBlock *Src = RI->getParent();
+
+  BranchInst::Create(Dest, Src);
+
+  // Update the PHIs in the destination. They were inserted in an order which
+  // makes this work.
+  addIncomingPHIValuesForInto(Src, Dest);
+
+  InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+  RI->eraseFromParent();
+}
+
+/// [LIBUNWIND] Check whether this selector is "only cleanups":
+///   call i32 @llvm.eh.selector(blah, blah, i32 0)
+static bool isCleanupOnlySelector(EHSelectorInst *selector) {
+  if (selector->getNumArgOperands() != 3) return false;
+  ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
+  return (val && val->isZero());
+}
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+/// Returns true to indicate that the next block should be skipped.
+static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   InvokeInliningInfo &Invoke) {
+  LandingPadInst *LPI = Invoke.getLandingPadInst();
+
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+
+    if (LPI) // FIXME: New EH - This won't be NULL in the new EH.
+      if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+        unsigned NumClauses = LPI->getNumClauses();
+        L->reserveClauses(NumClauses);
+        for (unsigned i = 0; i != NumClauses; ++i)
+          L->addClause(LPI->getClause(i));
+      }
+
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+
+    // LIBUNWIND: merge selector instructions.
+    if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
+      EHSelectorInst *Outer = Invoke.getOuterSelector();
+      if (!Outer) continue;
+
+      bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
+      bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
+
+      // If both selectors contain only cleanups, we don't need to do
+      // anything.  TODO: this is really just a very specific instance
+      // of a much more general optimization.
+      if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
+
+      // Otherwise, we just append the outer selector to the inner selector.
+      SmallVector<Value*, 16> NewSelector;
+      for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
+        NewSelector.push_back(Inner->getArgOperand(i));
+      for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
+        NewSelector.push_back(Outer->getArgOperand(i));
+
+      CallInst *NewInner =
+        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
+      // No need to copy attributes, calling convention, etc.
+      NewInner->takeName(Inner);
+      Inner->replaceAllUsesWith(NewInner);
+      Inner->eraseFromParent();
+      continue;
+    }
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+
+    // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
+    // directly to the new landing pad.
+    if (Invoke.forwardEHResume(CI, BB)) {
+      // TODO: 'Split' is now unreachable; clean it up.
+
+      // We want to leave the original call intact so that the call
+      // graph and other structures won't get misled.  We also have to
+      // avoid processing the next block, or we'll iterate here forever.
+      return true;
+    }
+
+    // Otherwise, create the new invoke instruction.
+    ImmutableCallSite CS(CI);
+    SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split,
+                         Invoke.getOuterUnwindDest(),
+                         InvokeArgs, CI->getName(), BB);
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present, because it uses a WeakVH.
+    CI->replaceAllUsesWith(II);
+
+    Split->getInstList().pop_front();  // Delete the original call
+
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    Invoke.addIncomingPHIValuesFor(BB);
+    return false;
+  }
+
+  return false;
+}
+  
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined.  FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+                                ClonedCodeInfo &InlinedCodeInfo) {
+  BasicBlock *InvokeDest = II->getUnwindDest();
+
+  Function *Caller = FirstNewBlock->getParent();
+
+  // The inlined code is currently at the end of the function, scan from the
+  // start of the inlined code to its end, checking for stuff we need to
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+
+  InvokeInliningInfo Invoke(II);
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
+        // Honor a request to skip the next block.  We don't need to
+        // consider UnwindInsts in this case either.
+        ++BB;
+        continue;
+      }
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      Invoke.addIncomingPHIValuesFor(BB);
+    }
+
+    if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+      Invoke.forwardResume(RI);
+    }
+  }
+
+  // Now that everything is happy, we have one final detail.  The PHI nodes in
+  // the exception destination block still have entries due to the original
+  // invoke instruction.  Eliminate these entries (which might even delete the
+  // PHI node) now.
+  InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made.  Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+                                         Function::iterator FirstNewBlock,
+                                         ValueToValueMapTy &VMap,
+                                         InlineFunctionInfo &IFI) {
+  CallGraph &CG = *IFI.CG;
+  const Function *Caller = CS.getInstruction()->getParent()->getParent();
+  const Function *Callee = CS.getCalledFunction();
+  CallGraphNode *CalleeNode = CG[Callee];
+  CallGraphNode *CallerNode = CG[Caller];
+
+  // Since we inlined some uninlined call sites in the callee into the caller,
+  // add edges from the caller to all of the callees of the callee.
+  CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+  // Consider the case where CalleeNode == CallerNode.
+  CallGraphNode::CalledFunctionsVector CallCache;
+  if (CalleeNode == CallerNode) {
+    CallCache.assign(I, E);
+    I = CallCache.begin();
+    E = CallCache.end();
+  }
+
+  for (; I != E; ++I) {
+    const Value *OrigCall = I->first;
+
+    ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+    // Only copy the edge if the call was inlined!
+    if (VMI == VMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    if (NewCall == 0) continue;
+
+    // Remember that this call site got inlined for the client of
+    // InlineFunction.
+    IFI.InlinedCalls.push_back(NewCall);
+
+    // It's possible that inlining the callsite will cause it to go from an
+    // indirect to a direct call by resolving a function pointer.  If this
+    // happens, set the callee of the new call site to a more precise
+    // destination.  This can also happen if the call graph node of the caller
+    // was just unnecessarily imprecise.
+    if (I->second->getFunction() == 0)
+      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+        // Indirect call site resolved to direct call.
+        CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+        continue;
+      }
+
+    CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+  }
+  
+  // Update the call graph by deleting the edge from Callee to Caller.  We must
+  // do this after the loop above in case Caller and Callee are the same.
+  CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+                                  const Function *CalledFunc,
+                                  InlineFunctionInfo &IFI,
+                                  unsigned ByValAlignment) {
+  Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // If the pointer is already known to be sufficiently aligned, or if we can
+    // round it up to a larger alignment, then we don't need a temporary.
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+                                   IFI.TD) >= ByValAlignment)
+      return Arg;
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
+    // for code quality, but rarely happens and is required for correctness.
+  }
+  
+  LLVMContext &Context = Arg->getContext();
+
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  
+  // Create the alloca.  If we have TargetData, use nice alignment.
+  unsigned Align = 1;
+  if (IFI.TD)
+    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  
+  // If the byval had an alignment specified, we *must* use at least that
+  // alignment, as it is required by the byval argument (and uses of the
+  // pointer inside the callee).
+  Align = std::max(Align, ByValAlignment);
+  
+  Function *Caller = TheCall->getParent()->getParent(); 
+  
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+                                    &*Caller->begin()->begin());
+  // Emit a memcpy.
+  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+                                                 Intrinsic::memcpy, 
+                                                 Tys);
+  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  
+  Value *Size;
+  if (IFI.TD == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.TD->getTypeStoreSize(AggTy));
+  
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DestCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+  
+  // Uses of the argument in the function should use our new alloca
+  // instead.
+  return NewAlloca;
+}
+
+// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
+// intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+       ++UI) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// hasLifetimeMarkers - Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+  Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
+  if (AI->getType() == Int8PtrTy)
+    return isUsedByLifetimeMarker(AI);
+
+  // Do a scan to find all the casts to i8*.
+  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
+       ++I) {
+    if (I->getType() != Int8PtrTy) continue;
+    if (I->stripPointerCasts() != AI) continue;
+    if (isUsedByLifetimeMarker(*I))
+      return true;
+  }
+  return false;
+}
+
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
+                                    const DebugLoc &InlinedAtDL,
+                                    LLVMContext &Ctx) {
+  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+    DebugLoc NewInlinedAtDL 
+      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                         NewInlinedAtDL.getAsMDNode(Ctx));
+  }
+                                             
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                       InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+                              Instruction *TheCall) {
+  DebugLoc TheCallDL = TheCall->getDebugLoc();
+  if (TheCallDL.isUnknown())
+    return;
+
+  for (; FI != Fn->end(); ++FI) {
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      DebugLoc DL = BI->getDebugLoc();
+      if (!DL.isUnknown()) {
+        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+          LLVMContext &Ctx = BI->getContext();
+          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
+          DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), 
+                                                   InlinedAt, Ctx));
+        }
+      }
+    }
+  }
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller.  This returns false if it is not possible to inline this
+// call.  The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining.  For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream.  Similarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+  Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
+  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+         "Instruction not in function!");
+
+  // If IFI has any state in it, zap it before we fill it in.
+  IFI.reset();
+  
+  const Function *CalledFunc = CS.getCalledFunction();
+  if (CalledFunc == 0 ||          // Can't inline external function or indirect
+      CalledFunc->isDeclaration() || // call, or call to a vararg function!
+      CalledFunc->getFunctionType()->isVarArg()) return false;
+
+  // If the call to the callee is not a tail call, we must clear the 'tail'
+  // flags on any calls that we inline.
+  bool MustClearTailCallFlags =
+    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+  // If the call to the callee cannot throw, set the 'nounwind' flag on any
+  // calls that we inline.
+  bool MarkNoUnwind = CS.doesNotThrow();
+
+  BasicBlock *OrigBB = TheCall->getParent();
+  Function *Caller = OrigBB->getParent();
+
+  // GC poses two hazards to inlining, which only occur when the callee has GC:
+  //  1. If the caller has no GC, then the callee's GC must be propagated to the
+  //     caller.
+  //  2. If the caller has a differing GC, it is invalid to inline.
+  if (CalledFunc->hasGC()) {
+    if (!Caller->hasGC())
+      Caller->setGC(CalledFunc->getGC());
+    else if (CalledFunc->getGC() != Caller->getGC())
+      return false;
+  }
+
+  // Find the personality function used by the landing pads of the caller. If it
+  // exists, then check to see that it matches the personality function used in
+  // the callee.
+  for (Function::const_iterator
+         I = Caller->begin(), E = Caller->end(); I != E; ++I)
+    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+      const BasicBlock *BB = II->getUnwindDest();
+      // FIXME: This 'isa' here should become go away once the new EH system is
+      // in place.
+      if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+        continue;
+      const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+      const Value *CallerPersFn = LP->getPersonalityFn();
+
+      // If the personality functions match, then we can perform the
+      // inlining. Otherwise, we can't inline.
+      // TODO: This isn't 100% true. Some personality functions are proper
+      //       supersets of others and can be used in place of the other.
+      for (Function::const_iterator
+             I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
+        if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+          const BasicBlock *BB = II->getUnwindDest();
+          // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+          // the new EH system is in place.
+          if (const LandingPadInst *LP =
+              dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+            if (CallerPersFn != LP->getPersonalityFn())
+              return false;
+          break;
+        }
+
+      break;
+    }
+
+  // Get an iterator to the last basic block in the function, which will have
+  // the new function inlined after it.
+  //
+  Function::iterator LastBlock = &Caller->back();
+
+  // Make sure to capture all of the return instructions from the cloned
+  // function.
+  SmallVector<ReturnInst*, 8> Returns;
+  ClonedCodeInfo InlinedFunctionInfo;
+  Function::iterator FirstNewBlock;
+
+  { // Scope to destroy VMap after cloning.
+    ValueToValueMapTy VMap;
+
+    assert(CalledFunc->arg_size() == CS.arg_size() &&
+           "No varargs calls can be inlined!");
+
+    // Calculate the vector of arguments to pass into the function cloner, which
+    // matches up the formal to the actual argument values.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    unsigned ArgNo = 0;
+    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+      Value *ActualArg = *AI;
+
+      // When byval arguments actually inlined, we need to make the copy implied
+      // by them explicit.  However, we don't do this if the callee is readonly
+      // or readnone, because the copy would be unneeded: the callee doesn't
+      // modify the struct.
+      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+                                        CalledFunc->getParamAlignment(ArgNo+1));
+ 
+        // Calls that we inline may use the new alloca, so we need to clear
+        // their 'tail' flags if HandleByValArgument introduced a new alloca and
+        // the callee has calls.
+        MustClearTailCallFlags |= ActualArg != *AI;
+      }
+
+      VMap[I] = ActualArg;
+    }
+
+    // We want the inliner to prune the code as it copies.  We would LOVE to
+    // have no dead or constant instructions leftover after inlining occurs
+    // (which can happen, e.g., because an argument was constant), but we'll be
+    // happy with whatever the cloner can do.
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
+                              /*ModuleLevelChanges=*/false, Returns, ".i",
+                              &InlinedFunctionInfo, IFI.TD, TheCall);
+
+    // Remember the first block that is newly cloned over.
+    FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+    // Update the callgraph if requested.
+    if (IFI.CG)
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+    // Update inlined instructions' line number information.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+  }
+
+  // If there are any alloca instructions in the block that used to be the entry
+  // block for the callee, move them to the entry block of the caller.  First
+  // calculate which instruction they should be inserted before.  We insert the
+  // instructions at the end of the current alloca list.
+  //
+  {
+    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+    for (BasicBlock::iterator I = FirstNewBlock->begin(),
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
+
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller.
+      IFI.StaticAllocas.push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+        ++I;
+      }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
+  }
+
+  // Leave lifetime markers for the static alloca's, scoping them to the
+  // function we just inlined.
+  if (!IFI.StaticAllocas.empty()) {
+    IRBuilder<> builder(FirstNewBlock->begin());
+    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+      AllocaInst *AI = IFI.StaticAllocas[ai];
+
+      // If the alloca is already scoped to something smaller than the whole
+      // function then there's no need to add redundant, less accurate markers.
+      if (hasLifetimeMarkers(AI))
+        continue;
+
+      builder.CreateLifetimeStart(AI);
+      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
+        IRBuilder<> builder(Returns[ri]);
+        builder.CreateLifetimeEnd(AI);
+      }
+    }
+  }
+
+  // If the inlined code contained dynamic alloca instructions, wrap the inlined
+  // code with llvm.stacksave/llvm.stackrestore intrinsics.
+  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+    Module *M = Caller->getParent();
+    // Get the two intrinsics we care about.
+    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+    // Insert the llvm.stacksave.
+    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+      .CreateCall(StackSave, "savedstack");
+
+    // Insert a call to llvm.stackrestore before any return instructions in the
+    // inlined function.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+    }
+
+    // Count the number of StackRestore calls we insert.
+    unsigned NumStackRestores = Returns.size();
+
+    // If we are inlining an invoke instruction, insert restores before each
+    // unwind.  These unwinds will be rewritten into branches later.
+    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+           BB != E; ++BB)
+        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+          IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
+          ++NumStackRestores;
+        }
+    }
+  }
+
+  // If we are inlining tail call instruction through a call site that isn't
+  // marked 'tail', we must remove the tail marker for any calls in the inlined
+  // code.  Also, calls inlined through a 'nounwind' call site should be marked
+  // 'nounwind'.
+  if (InlinedFunctionInfo.ContainsCalls &&
+      (MustClearTailCallFlags || MarkNoUnwind)) {
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          if (MustClearTailCallFlags)
+            CI->setTailCall(false);
+          if (MarkNoUnwind)
+            CI->setDoesNotThrow();
+        }
+  }
+
+  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+  // instructions are unreachable.
+  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB) {
+      TerminatorInst *Term = BB->getTerminator();
+      if (isa<UnwindInst>(Term)) {
+        new UnreachableInst(Context, Term);
+        BB->getInstList().erase(Term);
+      }
+    }
+
+  // If we are inlining for an invoke instruction, we must make sure to rewrite
+  // any inlined 'unwind' instructions into branches to the invoke exception
+  // destination, and call instructions into invoke instructions.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+  // If we cloned in _exactly one_ basic block, and if that block ends in a
+  // return instruction, we splice the body of the inlined callee directly into
+  // the calling basic block.
+  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+    // Move all of the instructions right before the call.
+    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+                                 FirstNewBlock->begin(), FirstNewBlock->end());
+    // Remove the cloned basic block.
+    Caller->getBasicBlockList().pop_back();
+
+    // If the call site was an invoke instruction, add a branch to the normal
+    // destination.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+      BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // If the return instruction returned a value, replace uses of the call with
+    // uses of the returned value.
+    if (!TheCall->use_empty()) {
+      ReturnInst *R = Returns[0];
+      if (TheCall == R->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(R->getReturnValue());
+    }
+    // Since we are now done with the Call/Invoke, we can delete it.
+    TheCall->eraseFromParent();
+
+    // Since we are now done with the return instruction, delete it also.
+    Returns[0]->eraseFromParent();
+
+    // We are now done with the inlining.
+    return true;
+  }
+
+  // Otherwise, we have the normal case, of more than one block to inline or
+  // multiple return sites.
+
+  // We want to clone the entire callee function into the hole between the
+  // "starter" and "ender" blocks.  How we accomplish this depends on whether
+  // this is an invoke instruction or a call instruction.
+  BasicBlock *AfterCallBB;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+    // Add an unconditional branch to make this look like the CallInst case...
+    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // Split the basic block.  This guarantees that no PHI nodes will have to be
+    // updated due to new incoming edges, and make the invoke case more
+    // symmetric to the call case.
+    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+                                          CalledFunc->getName()+".exit");
+
+  } else {  // It's a call
+    // If this is a call instruction, we need to split the basic block that
+    // the call lives in.
+    //
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+                                          CalledFunc->getName()+".exit");
+  }
+
+  // Change the branch that used to go to AfterCallBB to branch to the first
+  // basic block of the inlined function.
+  //
+  TerminatorInst *Br = OrigBB->getTerminator();
+  assert(Br && Br->getOpcode() == Instruction::Br &&
+         "splitBasicBlock broken!");
+  Br->setOperand(0, FirstNewBlock);
+
+
+  // Now that the function is correct, make it a little bit nicer.  In
+  // particular, move the basic blocks inserted from the end of the function
+  // into the space made by splitting the source basic block.
+  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+                                     FirstNewBlock, Caller->end());
+
+  // Handle all of the return instructions that we just cloned in, and eliminate
+  // any users of the original call/invoke instruction.
+  Type *RTy = CalledFunc->getReturnType();
+
+  PHINode *PHI = 0;
+  if (Returns.size() > 1) {
+    // The PHI node should go at the front of the new basic block to merge all
+    // possible incoming values.
+    if (!TheCall->use_empty()) {
+      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+                            AfterCallBB->begin());
+      // Anything that used the result of the function call should now use the
+      // PHI node as their operand.
+      TheCall->replaceAllUsesWith(PHI);
+    }
+
+    // Loop over all of the return instructions adding entries to the PHI node
+    // as appropriate.
+    if (PHI) {
+      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+        ReturnInst *RI = Returns[i];
+        assert(RI->getReturnValue()->getType() == PHI->getType() &&
+               "Ret value not consistent in function!");
+        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+      }
+    }
+
+
+    // Add a branch to the merge points and remove return instructions.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *RI = Returns[i];
+      BranchInst::Create(AfterCallBB, RI);
+      RI->eraseFromParent();
+    }
+  } else if (!Returns.empty()) {
+    // Otherwise, if there is exactly one return value, just replace anything
+    // using the return value of the call with the computed value.
+    if (!TheCall->use_empty()) {
+      if (TheCall == Returns[0]->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+    }
+
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+    // Splice the code from the return block into the block that it will return
+    // to, which contains the code that was after the call.
+    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+                                      ReturnBB->getInstList());
+
+    // Delete the return instruction now and empty ReturnBB now.
+    Returns[0]->eraseFromParent();
+    ReturnBB->eraseFromParent();
+  } else if (!TheCall->use_empty()) {
+    // No returns, but something is using the return value of the call.  Just
+    // nuke the result.
+    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+  }
+
+  // Since we are now done with the Call/Invoke, we can delete it.
+  TheCall->eraseFromParent();
+
+  // We should always be able to fold the entry block of the function into the
+  // single predecessor of the block...
+  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+  // Splice the code entry block into calling block, right before the
+  // unconditional branch.
+  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+
+  // Remove the unconditional branch.
+  OrigBB->getInstList().erase(Br);
+
+  // Now we can remove the CalleeEntry block, which is now empty.
+  Caller->getBasicBlockList().erase(CalleeEntry);
+
+  // If we inserted a phi node, check to see if it has a single value (e.g. all
+  // the entries are the same or undef).  If so, remove the PHI so it doesn't
+  // block other optimizations.
+  if (PHI)
+    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+      PHI->replaceAllUsesWith(V);
+      PHI->eraseFromParent();
+    }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 0000000..45c15de
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+  struct InstNamer : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstNamer() : FunctionPass(ID) {
+      initializeInstNamerPass(*PassRegistry::getPassRegistry());
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &Info) const {
+      Info.setPreservesAll();
+    }
+
+    bool runOnFunction(Function &F) {
+      for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+           AI != AE; ++AI)
+        if (!AI->hasName() && !AI->getType()->isVoidTy())
+          AI->setName("arg");
+
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+        if (!BB->hasName())
+          BB->setName("bb");
+        
+        for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+          if (!I->hasName() && !I->getType()->isVoidTy())
+            I->setName("tmp");
+      }
+      return true;
+    }
+  };
+  
+  char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer", 
+                "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+  return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 0000000..b654111
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,279 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary.  For example, it turns
+// the left into the right code:
+// 
+// for (...)                for (...)
+//   if (c)                   if (c)
+//     X1 = ...                 X1 = ...
+//   else                     else
+//     X2 = ...                 X2 = ...
+//   X3 = phi(X1, X2)         X3 = phi(X1, X2)
+// ... = X3 + 4             X4 = phi(X3)
+//                          ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine.  The major benefit of this 
+// transformation is that it makes many other loop optimizations, such as 
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+  struct LCSSA : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LCSSA() : LoopPass(ID) {
+      initializeLCSSAPass(*PassRegistry::getPassRegistry());
+    }
+
+    // Cached analysis information for the current function.
+    DominatorTree *DT;
+    std::vector<BasicBlock*> LoopBlocks;
+    PredIteratorCache PredCache;
+    Loop *L;
+    
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.  It maintains both of these,
+    /// as well as the CFG.  It also requires dominator information.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreserved<ScalarEvolution>();
+    }
+  private:
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+    }
+
+    /// inLoop - returns true if the given block is within the current loop
+    bool inLoop(BasicBlock *B) const {
+      return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+    }
+  };
+}
+  
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
+  
+  DT = &getAnalysis<DominatorTree>();
+
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
+  LoopBlocks.clear();
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+  
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
+  
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
+  
+  assert(L->isLCSSAForm(*DT));
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
+  
+  BasicBlock *InstBB = Inst->getParent();
+  
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
+
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
+  
+  ++NumLCSSA; // We are applying the transformation
+
+  // Invoke instructions are special in that their result value is not available
+  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // the value, so adjust DomBB to the normal destination block, which is
+  // effectively where the value is first usable.
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+    DomBB = Inv->getNormalDest();
+
+  DomTreeNode *DomNode = DT->getNode(DomBB);
+
+  SmallVector<PHINode*, 16> AddedPHIs;
+
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+  
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value, and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+    
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+    
+    PHINode *PN = PHINode::Create(Inst->getType(),
+                                  PredCache.GetNumPreds(ExitBB),
+                                  Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+      PN->addIncoming(Inst, *PI);
+
+      // If the exit block has a predecessor not within the loop, arrange for
+      // the incoming value use corresponding to that predecessor to be
+      // rewritten in terms of a different LCSSA PHI.
+      if (!inLoop(*PI))
+        UsesToRewrite.push_back(
+          &PN->getOperandUse(
+            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+    }
+
+    AddedPHIs.push_back(PN);
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
+  }
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+  }
+
+  // Remove PHI nodes that did not have any uses rewritten.
+  for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+    if (AddedPHIs[i]->use_empty())
+      AddedPHIs[i]->eraseFromParent();
+  }
+  
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 0000000..7034feb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,888 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination.  This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+  TerminatorInst *T = BB->getTerminator();
+  IRBuilder<> Builder(T);
+
+  // Branch - See if we are conditional jumping on constant
+  if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+    if (BI->isUnconditional()) return false;  // Can't optimize uncond branch
+    BasicBlock *Dest1 = BI->getSuccessor(0);
+    BasicBlock *Dest2 = BI->getSuccessor(1);
+
+    if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+      // Are we branching on constant?
+      // YES.  Change to unconditional branch...
+      BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+      BasicBlock *OldDest     = Cond->getZExtValue() ? Dest2 : Dest1;
+
+      //cerr << "Function: " << T->getParent()->getParent()
+      //     << "\nRemoving branch from " << T->getParent()
+      //     << "\n\nTo: " << OldDest << endl;
+
+      // Let the basic block know that we are letting go of it.  Based on this,
+      // it will adjust it's PHI nodes.
+      OldDest->removePredecessor(BB);
+
+      // Replace the conditional branch with an unconditional one.
+      Builder.CreateBr(Destination);
+      BI->eraseFromParent();
+      return true;
+    }
+    
+    if (Dest2 == Dest1) {       // Conditional branch to same location?
+      // This branch matches something like this:
+      //     br bool %cond, label %Dest, label %Dest
+      // and changes it into:  br label %Dest
+
+      // Let the basic block know that we are letting go of one copy of it.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      Dest1->removePredecessor(BI->getParent());
+
+      // Replace the conditional branch with an unconditional one.
+      Builder.CreateBr(Dest1);
+      Value *Cond = BI->getCondition();
+      BI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+      return true;
+    }
+    return false;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+    // If we are switching on a constant, we can convert the switch into a
+    // single branch instruction!
+    ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+    BasicBlock *TheOnlyDest = SI->getSuccessor(0);  // The default dest
+    BasicBlock *DefaultDest = TheOnlyDest;
+    assert(TheOnlyDest == SI->getDefaultDest() &&
+           "Default destination is not successor #0?");
+
+    // Figure out which case it goes to.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+      // Found case matching a constant operand?
+      if (SI->getSuccessorValue(i) == CI) {
+        TheOnlyDest = SI->getSuccessor(i);
+        break;
+      }
+
+      // Check to see if this branch is going to the same place as the default
+      // dest.  If so, eliminate it as an explicit compare.
+      if (SI->getSuccessor(i) == DefaultDest) {
+        // Remove this entry.
+        DefaultDest->removePredecessor(SI->getParent());
+        SI->removeCase(i);
+        --i; --e;  // Don't skip an entry...
+        continue;
+      }
+
+      // Otherwise, check to see if the switch only branches to one destination.
+      // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+      // destinations.
+      if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+    }
+
+    if (CI && !TheOnlyDest) {
+      // Branching on a constant, but not any of the cases, go to the default
+      // successor.
+      TheOnlyDest = SI->getDefaultDest();
+    }
+
+    // If we found a single destination that we can fold the switch into, do so
+    // now.
+    if (TheOnlyDest) {
+      // Insert the new branch.
+      Builder.CreateBr(TheOnlyDest);
+      BasicBlock *BB = SI->getParent();
+
+      // Remove entries from PHI nodes which we no longer branch to...
+      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+        // Found case matching a constant operand?
+        BasicBlock *Succ = SI->getSuccessor(i);
+        if (Succ == TheOnlyDest)
+          TheOnlyDest = 0;  // Don't modify the first branch to TheOnlyDest
+        else
+          Succ->removePredecessor(BB);
+      }
+
+      // Delete the old switch.
+      Value *Cond = SI->getCondition();
+      SI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+      return true;
+    }
+    
+    if (SI->getNumSuccessors() == 2) {
+      // Otherwise, we can fold this switch into a conditional branch
+      // instruction if it has only one non-default destination.
+      Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+                                         SI->getSuccessorValue(1), "cond");
+
+      // Insert the new branch.
+      Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0));
+
+      // Delete the old switch.
+      SI->eraseFromParent();
+      return true;
+    }
+    return false;
+  }
+
+  if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+    // indirectbr blockaddress(@F, @BB) -> br label @BB
+    if (BlockAddress *BA =
+          dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+      BasicBlock *TheOnlyDest = BA->getBasicBlock();
+      // Insert the new branch.
+      Builder.CreateBr(TheOnlyDest);
+      
+      for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+        if (IBI->getDestination(i) == TheOnlyDest)
+          TheOnlyDest = 0;
+        else
+          IBI->getDestination(i)->removePredecessor(IBI->getParent());
+      }
+      Value *Address = IBI->getAddress();
+      IBI->eraseFromParent();
+      if (DeleteDeadConditions)
+        RecursivelyDeleteTriviallyDeadInstructions(Address);
+      
+      // If we didn't find our destination in the IBI successor list, then we
+      // have undefined behavior.  Replace the unconditional branch with an
+      // 'unreachable' instruction.
+      if (TheOnlyDest) {
+        BB->getTerminator()->eraseFromParent();
+        new UnreachableInst(BB->getContext(), BB);
+      }
+      
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+  if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+  // We don't want the landingpad instruction removed by anything this general.
+  if (isa<LandingPadInst>(I))
+    return false;
+
+  // We don't want debug info removed by anything this general, unless
+  // debug info is empty.
+  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+    if (DDI->getAddress())
+      return false;
+    return true;
+  }
+  if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+    if (DVI->getValue())
+      return false;
+    return true;
+  }
+
+  if (!I->mayHaveSideEffects()) return true;
+
+  // Special case intrinsics that "may have side effects" but can be deleted
+  // when dead.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    // Safe to delete llvm.stacksave if dead.
+    if (II->getIntrinsicID() == Intrinsic::stacksave)
+      return true;
+
+    // Lifetime intrinsics are dead when their right-hand is undef.
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+        II->getIntrinsicID() == Intrinsic::lifetime_end)
+      return isa<UndefValue>(II->getArgOperand(1));
+  }
+  return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it.  If that makes any of its operands
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+    return false;
+  
+  SmallVector<Instruction*, 16> DeadInsts;
+  DeadInsts.push_back(I);
+  
+  do {
+    I = DeadInsts.pop_back_val();
+
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      Value *OpV = I->getOperand(i);
+      I->setOperand(i, 0);
+      
+      if (!OpV->use_empty()) continue;
+    
+      // If the operand is an instruction that became dead as we nulled out the
+      // operand, and if it is 'trivially' dead, delete it in a future loop
+      // iteration.
+      if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+        if (isInstructionTriviallyDead(OpI))
+          DeadInsts.push_back(OpI);
+    }
+    
+    I->eraseFromParent();
+  } while (!DeadInsts.empty());
+
+  return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+  Value::use_iterator UI = I->use_begin();
+  Value::use_iterator UE = I->use_end();
+  if (UI == UE)
+    return true;
+
+  User *TheUse = *UI;
+  for (++UI; UI != UE; ++UI) {
+    if (*UI != TheUse)
+      return false;
+  }
+  return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it.  If that makes any of its operands trivially dead, delete them
+/// too, recursively.  Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+  SmallPtrSet<Instruction*, 4> Visited;
+  for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+       I = cast<Instruction>(*I->use_begin())) {
+    if (I->use_empty())
+      return RecursivelyDeleteTriviallyDeadInstructions(I);
+
+    // If we find an instruction more than once, we're on a cycle that
+    // won't prove fruitful.
+    if (!Visited.insert(I)) {
+      // Break the cycle and delete the instruction and its operands.
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+  bool MadeChange = false;
+  for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+    Instruction *Inst = BI++;
+    
+    if (Value *V = SimplifyInstruction(Inst, TD)) {
+      WeakVH BIHandle(BI);
+      ReplaceAndSimplifyAllUses(Inst, V, TD);
+      MadeChange = true;
+      if (BIHandle != BI)
+        BI = BB->begin();
+      continue;
+    }
+
+    if (Inst->isTerminator())
+      break;
+
+    WeakVH BIHandle(BI);
+    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+    if (BIHandle != BI)
+      BI = BB->begin();
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                        TargetData *TD) {
+  // This only adjusts blocks with PHI nodes.
+  if (!isa<PHINode>(BB->begin()))
+    return;
+  
+  // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+  // them down.  This will leave us with single entry phi nodes and other phis
+  // that can be removed.
+  BB->removePredecessor(Pred, true);
+  
+  WeakVH PhiIt = &BB->front();
+  while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+    PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+    Value *PNV = SimplifyInstruction(PN, TD);
+    if (PNV == 0) continue;
+
+    // If we're able to simplify the phi to a single value, substitute the new
+    // value into all of its uses.
+    assert(PNV != PN && "SimplifyInstruction broken!");
+    
+    Value *OldPhiIt = PhiIt;
+    ReplaceAndSimplifyAllUses(PN, PNV, TD);
+    
+    // If recursive simplification ended up deleting the next PHI node we would
+    // iterate to, then our iterator is invalid, restart scanning from the top
+    // of the block.
+    if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+  }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!).  Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+  // If BB has single-entry PHI nodes, fold them.
+  while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    Value *NewVal = PN->getIncomingValue(0);
+    // Replace self referencing PHI with undef, it must be dead.
+    if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+    PN->replaceAllUsesWith(NewVal);
+    PN->eraseFromParent();
+  }
+  
+  BasicBlock *PredBB = DestBB->getSinglePredecessor();
+  assert(PredBB && "Block doesn't have a single predecessor!");
+  
+  // Zap anything that took the address of DestBB.  Not doing this will give the
+  // address an invalid value.
+  if (DestBB->hasAddressTaken()) {
+    BlockAddress *BA = BlockAddress::get(DestBB);
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                     BA->getType()));
+    BA->destroyConstant();
+  }
+  
+  // Anything that branched to PredBB now branches to DestBB.
+  PredBB->replaceAllUsesWith(DestBB);
+  
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  if (P) {
+    DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+    if (DT) {
+      BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+      DT->changeImmediateDominator(DestBB, PredBBIDom);
+      DT->eraseNode(PredBB);
+    }
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
+  // Nuke BB.
+  PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
+  // Shortcut, if there is only a single predecessor it must be BB and merging
+  // is always safe
+  if (Succ->getSinglePredecessor()) return true;
+
+  // Make a list of the predecessors of BB
+  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+  // Use that list to make another list of common predecessors of BB and Succ
+  BlockSet CommonPreds;
+  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (BBPreds.count(P))
+      CommonPreds.insert(P);
+  }
+
+  // Shortcut, if there are no common predecessors, merging is always safe
+  if (CommonPreds.empty())
+    return true;
+  
+  // Look at all the phi nodes in Succ, to see if they present a conflict when
+  // merging these blocks
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    // If the incoming value from BB is again a PHINode in
+    // BB which has the same incoming value for *PI as PN does, we can
+    // merge the phi nodes and then the blocks can still be merged
+    PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+    if (BBPN && BBPN->getParent() == BB) {
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        if (BBPN->getIncomingValueForBlock(*PI) 
+              != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    } else {
+      Value* Val = PN->getIncomingValueForBlock(BB);
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        // See if the incoming value for the common predecessor is equal to the
+        // one for BB, in which case this phi node will not prevent the merging
+        // of the block.
+        if (Val != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch.  If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true.  If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+  assert(BB != &BB->getParent()->getEntryBlock() &&
+         "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+  // We can't eliminate infinite loops.
+  BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+  if (BB == Succ) return false;
+  
+  // Check to see if merging these blocks would cause conflicts for any of the
+  // phi nodes in BB or Succ. If not, we can safely merge.
+  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+  
+  if (isa<PHINode>(Succ->begin())) {
+    // If there is more than one pred of succ, and there are PHI nodes in
+    // the successor, then we need to add incoming edges for the PHI nodes
+    //
+    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+    
+    // Loop over all of the PHI nodes in the successor of BB.
+    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      Value *OldVal = PN->removeIncomingValue(BB, false);
+      assert(OldVal && "No entry in PHI for Pred BB!");
+      
+      // If this incoming value is one of the PHI nodes in BB, the new entries
+      // in the PHI node are the entries from the old PHI.
+      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+        PHINode *OldValPN = cast<PHINode>(OldVal);
+        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+          // Note that, since we are merging phi nodes and BB and Succ might
+          // have common predecessors, we could end up with a phi node with
+          // identical incoming branches. This will be cleaned up later (and
+          // will trigger asserts if we try to clean it up now, without also
+          // simplifying the corresponding conditional branch).
+          PN->addIncoming(OldValPN->getIncomingValue(i),
+                          OldValPN->getIncomingBlock(i));
+      } else {
+        // Add an incoming value for each of the new incoming values.
+        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+          PN->addIncoming(OldVal, BBPreds[i]);
+      }
+    }
+  }
+  
+  if (Succ->getSinglePredecessor()) {
+    // BB is the only predecessor of Succ, so Succ will end up with exactly
+    // the same predecessors BB had.
+
+    // Copy over any phi, debug or lifetime instruction.
+    BB->getTerminator()->eraseFromParent();
+    Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+  } else {
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
+    }
+  }
+    
+  // Everything that jumped to BB now goes to Succ.
+  BB->replaceAllUsesWith(Succ);
+  if (!Succ->hasName()) Succ->takeName(BB);
+  BB->eraseFromParent();              // Delete the old basic block.
+  return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+
+  // This implementation doesn't currently consider undef operands
+  // specially. Theoretically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    // This hash algorithm is quite weak as hash functions go, but it seems
+    // to do a good enough job for this particular purpose, and is very quick.
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+         I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+    Hash >>= 1;
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+                                      unsigned PrefAlign, const TargetData *TD) {
+  V = V->stripPointerCasts();
+
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If the preferred alignment is greater than the natural stack alignment
+    // then don't round up. This avoids dynamic stack realignment.
+    if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+      return Align;
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
+  }
+
+  return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                          const TargetData *TD) {
+  assert(V->getType()->isPointerTy() &&
+         "getOrEnforceKnownAlignment expects a pointer!");
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  
+  // Avoid trouble with rediculously large TrailZ values, such as
+  // those computed from a null pointer.
+  TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+  
+  // LLVM doesn't support alignments larger than this currently.
+  Align = std::min(Align, +Value::MaximumAlignment);
+  
+  if (PrefAlign > Align)
+    Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
+    
+  // We don't need to make any adjustment.
+  return Align;
+}
+
+///===---------------------------------------------------------------------===//
+///  Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           StoreInst *SI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = NULL;
+  // If an argument is zero extended then use argument directly. The ZExt
+  // may be zapped by an optimization pass in future.
+  Argument *ExtendedArg = NULL;
+  if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+    ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+  if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+    ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+  if (ExtendedArg)
+    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+  else
+    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc SIDL = SI->getDebugLoc();
+  if (!SIDL.isUnknown())
+    DbgVal->setDebugLoc(SIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           LoadInst *LI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = 
+    Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+                                    DIVar, LI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc LIDL = LI->getDebugLoc();
+  if (!LIDL.isUnknown())
+    DbgVal->setDebugLoc(LIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+  DIBuilder DIB(*F.getParent());
+  SmallVector<DbgDeclareInst *, 4> Dbgs;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        Dbgs.push_back(DDI);
+    }
+  if (Dbgs.empty())
+    return false;
+
+  for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+         E = Dbgs.end(); I != E; ++I) {
+    DbgDeclareInst *DDI = *I;
+    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      bool RemoveDDI = true;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E; ++UI)
+        if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+        else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+        else
+          RemoveDDI = false;
+      if (RemoveDDI)
+        DDI->eraseFromParent();
+    }
+  }
+  return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
+    for (Value::use_iterator UI = DebugNode->use_begin(),
+         E = DebugNode->use_end(); UI != E; ++UI)
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+        return DDI;
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000..cbd54a8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,776 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested  , "Number of nested loops split out");
+
+namespace {
+  struct LoopSimplify : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LoopSimplify() : LoopPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    // AA - If we have an alias analysis object to update, this is it, otherwise
+    // this is null.
+    AliasAnalysis *AA;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // We need loop information to identify the loops...
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+    }
+
+    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+    void verifyAnalysis() const;
+
+  private:
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                  SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                  Loop *L);
+  };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
+  bool Changed = false;
+  LI = &getAnalysis<LoopInfo>();
+  AA = getAnalysisIfAvailable<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+  Changed |= ProcessLoop(L, LPM);
+
+  return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+  bool Changed = false;
+ReprocessLoop:
+
+  // Check to see that no blocks (other than the header) in this loop have
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock*, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB),
+         PE = pred_end(*BB); PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (!L->contains(P))
+        BadPreds.insert(P);
+    }
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                   << (*I)->getName() << "\n");
+
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
+
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+       E = ExitingBlocks.end(); I != E; ++I)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                       << (*I)->getName() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+          Changed = true;
+        }
+      }
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
+    if (Preheader) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+         E = ExitBlockSet.end(); I != E; ++I) {
+    BasicBlock *ExitBlock = *I;
+    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+         PI != PE; ++PI)
+      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+      // allowed.
+      if (!L->contains(*PI)) {
+        if (RewriteLoopExitBlock(L, ExitBlock)) {
+          ++NumInserted;
+          Changed = true;
+        }
+        break;
+      }
+  }
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (L->getNumBackEdges() < 8) {
+      if (SeparateNestedLoop(L, LPM)) {
+        ++NumNested;
+        // This is a big restructuring change, reprocess the whole loop.
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+    if (LoopLatch) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      if (AA) AA->deleteValue(PN);
+      if (SE) SE->forgetValue(PN);
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+    }
+
+  // If this loop has multiple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  bool UniqueExit = true;
+  if (!ExitBlocks.empty())
+    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i] != ExitBlocks[0]) {
+        UniqueExit = false;
+        break;
+      }
+  if (UniqueExit) {
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+        Instruction *Inst = I++;
+        // Skip debug info intrinsics.
+        if (isa<DbgInfoIntrinsic>(Inst))
+          continue;
+        if (Inst == CI)
+          continue;
+        if (!L->makeLoopInvariant(Inst, Changed,
+                                  Preheader ? Preheader->getTerminator() : 0)) {
+          AllInvariant = false;
+          break;
+        }
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and delete it.
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                   << ExitingBlock->getName() << "\n");
+
+      // If any reachable control flow within this loop has changed, notify
+      // ScalarEvolution. Currently assume the parent loop doesn't change
+      // (spliting edges doesn't count). If blocks, CFG edges, or other values
+      // in the parent loop change, then we need call to forgetLoop() for the
+      // parent instead.
+      if (SE)
+        SE->forgetLoop(L);
+
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+      }
+      DT->eraseNode(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one.  This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+  BasicBlock *Header = L->getHeader();
+
+  // Compute the set of predecessors of the loop that are not in the loop.
+  SmallVector<BasicBlock*, 8> OutsideBlocks;
+  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (!L->contains(P)) {         // Coming in from outside the loop?
+      // If the loop is branched to from an indirect branch, we won't
+      // be able to fully transform the loop, because it prohibits
+      // edge splitting.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      // Keep track of it.
+      OutsideBlocks.push_back(P);
+    }
+  }
+
+  // Split out the loop pre-header.
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+                           ".preheader", this);
+
+  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+               << "\n");
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks.  This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+  SmallVector<BasicBlock*, 8> LoopBlocks;
+  for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+    BasicBlock *P = *I;
+    if (L->contains(P)) {
+      // Don't do this if the loop is exited via an indirect branch.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      LoopBlocks.push_back(P);
+    }
+  }
+
+  assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+  BasicBlock *NewExitBB = 0;
+
+  if (Exit->isLandingPad()) {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
+                                                            LoopBlocks.size()),
+                                ".loopexit", ".nonloopexit",
+                                this, NewBBs);
+    NewExitBB = NewBBs[0];
+  } else {
+    NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+                                       LoopBlocks.size(), ".loopexit",
+                                       this);
+  }
+
+  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+               << NewExitBB->getName() << "\n");
+  return NewExitBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there.  Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+                                  std::set<BasicBlock*> &Blocks) {
+  std::vector<BasicBlock *> WorkList;
+  WorkList.push_back(InputBB);
+  do {
+    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    if (Blocks.insert(BB).second && BB != StopBlock)
+      // If BB is not already processed and it is not a stop block then
+      // insert its predecessor in the work list
+      for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+        BasicBlock *WBB = *I;
+        WorkList.push_back(WBB);
+      }
+  } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+                                        AliasAnalysis *AA, LoopInfo *LI) {
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I);
+    ++I;
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Scan this PHI node looking for a use of the PHI node by itself.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == PN &&
+          L->contains(PN->getIncomingBlock(i)))
+        // We found something tasty to remove.
+        return PN;
+  }
+  return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block.  This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                       SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                            Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = NewBB; --BBI;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
+  }
+
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
+
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = 0;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i];
+    if (++BBI != NewBB->getParent()->end() &&
+        L->contains(BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
+    }
+  }
+
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop.  This is important for code that looks like
+/// this:
+///
+///  Loop:
+///     ...
+///     br cond, Loop, Next
+///     ...
+///     br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop.  PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+  if (PN == 0) return 0;  // No known way to partition.
+
+  // Pull out all predecessors that have varying values in the loop.  This
+  // handles the case when a PHI node has multiple instances of itself as
+  // arguments.
+  SmallVector<BasicBlock*, 8> OuterLoopPreds;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) != PN ||
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirectbr edges.
+      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+        return 0;
+
+      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+    }
+
+  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+  // If ScalarEvolution is around and knows anything about values in
+  // this loop, tell it to forget them, because we're about to
+  // substantially change it.
+  if (SE)
+    SE->forgetLoop(L);
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+                                             OuterLoopPreds.size(),
+                                             ".outer", this);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+  // Create the new outer loop.
+  Loop *NewOuter = new Loop();
+
+  // Change the parent loop to use the outer loop as its child now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->replaceChildLoopWith(L, NewOuter);
+  else
+    LI->changeTopLevelLoop(L, NewOuter);
+
+  // L is now a subloop of our outer loop.
+  NewOuter->addChildLoop(L);
+
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    NewOuter->addBlockEntry(*I);
+
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
+  // Determine which blocks should stay in L and which should be moved out to
+  // the Outer loop now.
+  std::set<BasicBlock*> BlocksInL;
+  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(Header, P))
+      AddBlockAndPredsToSet(P, Header, BlocksInL);
+  }
+
+  // Scan all of the loop children of L, moving them to OuterLoop if they are
+  // not part of the inner loop.
+  const std::vector<Loop*> &SubLoops = L->getSubLoops();
+  for (size_t I = 0; I != SubLoops.size(); )
+    if (BlocksInL.count(SubLoops[I]->getHeader()))
+      ++I;   // Loop remains in L
+    else
+      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+  // Now that we know which blocks are in L and which need to be moved to
+  // OuterLoop, move any blocks that need it.
+  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    if (!BlocksInL.count(BB)) {
+      // Move this block to the parent, updating the exit blocks sets
+      L->removeBlockFromLoop(BB);
+      if ((*LI)[BB] == L)
+        LI->changeLoopFor(BB, NewOuter);
+      --i;
+    }
+  }
+
+  return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it.  If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header.  This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+  // Get information about the loop
+  BasicBlock *Header = L->getHeader();
+  Function *F = Header->getParent();
+
+  // Unique backedge insertion currently depends on having a preheader.
+  if (!Preheader)
+    return 0;
+
+  // Figure out which basic blocks contain back-edges to the loop header.
+  std::vector<BasicBlock*> BackedgeBlocks;
+  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+    BasicBlock *P = *I;
+
+    // Indirectbr edges cannot be split, so we must fail if we find one.
+    if (isa<IndirectBrInst>(P->getTerminator()))
+      return 0;
+
+    if (P != Preheader) BackedgeBlocks.push_back(P);
+  }
+
+  // Create and insert the new backedge block...
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
+  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+               << BEBlock->getName() << "\n");
+
+  // Move the new backedge block to right after the last backedge block.
+  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+  // Now that the block has been inserted into the function, create PHI nodes in
+  // the backedge block which correspond to any PHI nodes in the header block.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+                                     PN->getName()+".be", BETerminator);
+    if (AA) AA->copyValue(PN, NewPN);
+
+    // Loop over the PHI node, moving all entries except the one for the
+    // preheader over to the new PHI node.
+    unsigned PreheaderIdx = ~0U;
+    bool HasUniqueIncomingValue = true;
+    Value *UniqueValue = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *IBB = PN->getIncomingBlock(i);
+      Value *IV = PN->getIncomingValue(i);
+      if (IBB == Preheader) {
+        PreheaderIdx = i;
+      } else {
+        NewPN->addIncoming(IV, IBB);
+        if (HasUniqueIncomingValue) {
+          if (UniqueValue == 0)
+            UniqueValue = IV;
+          else if (UniqueValue != IV)
+            HasUniqueIncomingValue = false;
+        }
+      }
+    }
+
+    // Delete all of the incoming values from the old PN except the preheader's
+    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+    if (PreheaderIdx != 0) {
+      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+    }
+    // Nuke all entries except the zero'th.
+    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+      PN->removeIncomingValue(e-i, false);
+
+    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+    PN->addIncoming(NewPN, BEBlock);
+
+    // As an optimization, if all incoming values in the new PhiNode (which is a
+    // subset of the incoming values of the old PHI node) have the same value,
+    // eliminate the PHI Node.
+    if (HasUniqueIncomingValue) {
+      NewPN->replaceAllUsesWith(UniqueValue);
+      if (AA) AA->deleteValue(NewPN);
+      BEBlock->getInstList().erase(NewPN);
+    }
+  }
+
+  // Now that all of the PHI nodes have been inserted and adjusted, modify the
+  // backedge blocks to just to the BEBlock instead of the header.
+  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+      if (TI->getSuccessor(Op) == Header)
+        TI->setSuccessor(Op, BEBlock);
+  }
+
+  //===--- Update all analyses which we must preserve now -----------------===//
+
+  // Update Loop Information - we know that this block is now in the current
+  // loop and all parent loops.
+  L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+  // Update dominator information
+  DT->splitBlock(BEBlock);
+
+  return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+  // It used to be possible to just assert L->isLoopSimplifyForm(), however
+  // with the introduction of indirectbr, there are now cases where it's
+  // not possible to transform a loop as necessary. We can at least check
+  // that there is an indirectbr near any time there's trouble.
+
+  // Indirectbr can interfere with preheader and unique backedge insertion.
+  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+    bool HasIndBrPred = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PE = pred_end(L->getHeader()); PI != PE; ++PI)
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+        HasIndBrPred = true;
+        break;
+      }
+    assert(HasIndBrPred &&
+           "LoopSimplify has no excuse for missing loop header info!");
+    (void)HasIndBrPred;
+  }
+
+  // Indirectbr can interfere with exit block canonicalization.
+  if (!L->hasDedicatedExits()) {
+    bool HasIndBrExiting = false;
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+        HasIndBrExiting = true;
+        break;
+      }
+    }
+
+    assert(HasIndBrExiting &&
+           "LoopSimplify has no excuse for missing exit block info!");
+    (void)HasIndBrExiting;
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 0000000..62e4fa2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,425 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches.  This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+                                    ValueToValueMapTy &VMap) {
+  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+    Value *Op = I->getOperand(op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
+    if (It != VMap.end())
+      I->setOperand(op, It->second);
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
+                                            LPPassManager *LPM) {
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  BasicBlock *OnlyPred = BB->getSinglePredecessor();
+  if (!OnlyPred) return 0;
+
+  if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+
+  DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+  // Resolve any PHI nodes at the start of the block.  They are all
+  // guaranteed to have exactly one entry if they exist, unless there are
+  // multiple duplicate (but guaranteed to be equal) entries for the
+  // incoming edges.  This occurs when there are multiple edges from
+  // OnlyPred to OnlySucc.
+  FoldSingleEntryPHINodes(BB);
+
+  // Delete the unconditional branch from the predecessor...
+  OnlyPred->getInstList().pop_back();
+
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(OnlyPred);
+
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+  std::string OldName = BB->getName();
+
+  // Erase basic block from the function...
+
+  // ScalarEvolution holds references to loop exit blocks.
+  if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
+    if (Loop *L = LI->getLoopFor(BB))
+      SE->forgetLoop(L);
+  }
+  LI->removeBlock(BB);
+  BB->eraseFromParent();
+
+  // Inherit predecessor's name if it exists...
+  if (!OldName.empty() && !OnlyPred->hasName())
+    OnlyPred->setName(OldName);
+
+  return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// TripCount is generally defined as the number of times the loop header
+/// executes. UnrollLoop relaxes the definition to permit early exits: here
+/// TripCount is the iteration on which control exits LatchBlock if no early
+/// exits were taken. Note that UnrollLoop assumes that the loop counter test
+/// terminates LatchBlock in order to remove unnecesssary instances of the
+/// test. In other words, control may exit the loop prior to TripCount
+/// iterations via an early branch, but control may not exit the loop from the
+/// LatchBlock's terminator prior to TripCount iterations.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+///
+/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
+/// available it must also preserve those analyses.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+                      unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock) {
+    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *Header = L->getHeader();
+  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+  if (!BI || BI->isUnconditional()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
+    return false;
+  }
+
+  if (Header->hasAddressTaken()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+          "  Won't unroll loop: address of header block is taken.\n");
+    return false;
+  }
+
+  // Notify ScalarEvolution that the loop will be substantially changed,
+  // if not outright eliminated.
+  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (SE)
+    SE->forgetLoop(L);
+
+  if (TripCount != 0)
+    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
+  if (TripMultiple != 1)
+    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
+
+  // Effectively "DCE" unrolled iterations that are beyond the tripcount
+  // and will never be executed.
+  if (TripCount != 0 && Count > TripCount)
+    Count = TripCount;
+
+  assert(Count > 0);
+  assert(TripMultiple > 0);
+  assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+  // Are we eliminating the loop control altogether?
+  bool CompletelyUnroll = Count == TripCount;
+
+  // If we know the trip count, we know the multiple...
+  unsigned BreakoutTrip = 0;
+  if (TripCount != 0) {
+    BreakoutTrip = TripCount % Count;
+    TripMultiple = 0;
+  } else {
+    // Figure out what multiple to use.
+    BreakoutTrip = TripMultiple =
+      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+  }
+
+  if (CompletelyUnroll) {
+    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
+  } else {
+    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
+    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+    } else if (TripMultiple != 1) {
+      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    }
+    DEBUG(dbgs() << "!\n");
+  }
+
+  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+  // For the first iteration of the loop, we should use the precloned values for
+  // PHI nodes.  Insert associations now.
+  ValueToValueMapTy LastValueMap;
+  std::vector<PHINode*> OrigPHINode;
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    OrigPHINode.push_back(cast<PHINode>(I));
+  }
+
+  std::vector<BasicBlock*> Headers;
+  std::vector<BasicBlock*> Latches;
+  Headers.push_back(Header);
+  Latches.push_back(LatchBlock);
+
+  // The current on-the-fly SSA update requires blocks to be processed in
+  // reverse postorder so that LastValueMap contains the correct value at each
+  // exit.
+  LoopBlocksDFS DFS(L);
+  DFS.perform(LI);
+
+  // Stash the DFS iterators before adding blocks to the loop.
+  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+  for (unsigned It = 1; It != Count; ++It) {
+    std::vector<BasicBlock*> NewBlocks;
+
+    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+      ValueToValueMapTy VMap;
+      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+      Header->getParent()->getBasicBlockList().push_back(New);
+
+      // Loop over all of the PHI nodes in the block, changing them to use the
+      // incoming values from the previous block.
+      if (*BB == Header)
+        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+            if (It > 1 && L->contains(InValI))
+              InVal = LastValueMap[InValI];
+          VMap[OrigPHINode[i]] = InVal;
+          New->getInstList().erase(NewPHI);
+        }
+
+      // Update our running map of newest clones
+      LastValueMap[*BB] = New;
+      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+           VI != VE; ++VI)
+        LastValueMap[VI->first] = VI->second;
+
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+      // Add phi entries for newly created values to all exit blocks.
+      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
+           SI != SE; ++SI) {
+        if (L->contains(*SI))
+          continue;
+        for (BasicBlock::iterator BBI = (*SI)->begin();
+             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+          Value *Incoming = phi->getIncomingValueForBlock(*BB);
+          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+          if (It != LastValueMap.end())
+            Incoming = It->second;
+          phi->addIncoming(Incoming, New);
+        }
+      }
+      // Keep track of new headers and latches as we create them, so that
+      // we can insert the proper branches later.
+      if (*BB == Header)
+        Headers.push_back(New);
+      if (*BB == LatchBlock)
+        Latches.push_back(New);
+
+      NewBlocks.push_back(New);
+    }
+
+    // Remap all instructions in the most recent iteration
+    for (unsigned i = 0; i < NewBlocks.size(); ++i)
+      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+           E = NewBlocks[i]->end(); I != E; ++I)
+        ::RemapInstruction(I, LastValueMap);
+  }
+
+  // Loop over the PHI nodes in the original block, setting incoming values.
+  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+    PHINode *PN = OrigPHINode[i];
+    if (CompletelyUnroll) {
+      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+      Header->getInstList().erase(PN);
+    }
+    else if (Count > 1) {
+      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+      // If this value was defined in the loop, take the value defined by the
+      // last iteration of the loop.
+      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+        if (L->contains(InValI))
+          InVal = LastValueMap[InVal];
+      }
+      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+      PN->addIncoming(InVal, Latches.back());
+    }
+  }
+
+  // Now that all the basic blocks for the unrolled iterations are in place,
+  // set up the branches to connect them.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    // The original branch was replicated in each unrolled iteration.
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+    // The branch destination.
+    unsigned j = (i + 1) % e;
+    BasicBlock *Dest = Headers[j];
+    bool NeedConditional = true;
+
+    // For a complete unroll, make the last iteration end with a branch
+    // to the exit block.
+    if (CompletelyUnroll && j == 0) {
+      Dest = LoopExit;
+      NeedConditional = false;
+    }
+
+    // If we know the trip count or a multiple of it, we can safely use an
+    // unconditional branch for some iterations.
+    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      NeedConditional = false;
+    }
+
+    if (NeedConditional) {
+      // Update the conditional branch's successor for the following
+      // iteration.
+      Term->setSuccessor(!ContinueOnTrue, Dest);
+    } else {
+      // Remove phi operands at this loop exit
+      if (Dest != LoopExit) {
+        BasicBlock *BB = Latches[i];
+        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+             SI != SE; ++SI) {
+          if (*SI == Headers[i])
+            continue;
+          for (BasicBlock::iterator BBI = (*SI)->begin();
+               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+            Phi->removeIncomingValue(BB, false);
+          }
+        }
+      }
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest, Term);
+      Term->eraseFromParent();
+    }
+  }
+
+  // Merge adjacent basic blocks, if possible.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+    if (Term->isUnconditional()) {
+      BasicBlock *Dest = Term->getSuccessor(0);
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
+        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+    }
+  }
+
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  // Incrementally updating domtree after loop unrolling would be easy.
+  if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
+    DT->runOnFunction(*L->getHeader()->getParent());
+
+  // Simplify any new induction variables in the partially unrolled loop.
+  if (SE && !CompletelyUnroll) {
+    SmallVector<WeakVH, 16> DeadInsts;
+    simplifyLoopIVs(L, SE, LPM, DeadInsts);
+
+    // Aggressively clean up dead instructions that simplifyLoopIVs already
+    // identified. Any remaining should be cleaned up below.
+    while (!DeadInsts.empty())
+      if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+        RecursivelyDeleteTriviallyDeadInstructions(Inst);
+  }
+
+  // At this point, the code is well formed.  We now do a quick sweep over the
+  // inserted code, doing constant propagation and dead code elimination as we
+  // go.
+  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+      Instruction *Inst = I++;
+
+      if (isInstructionTriviallyDead(Inst))
+        (*BB)->getInstList().erase(Inst);
+      else if (Value *V = SimplifyInstruction(Inst))
+        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+          Inst->replaceAllUsesWith(V);
+          (*BB)->getInstList().erase(Inst);
+        }
+    }
+
+  NumCompletelyUnrolled += CompletelyUnroll;
+  ++NumUnrolled;
+  // Remove the loop from the LoopPassManager if it's completely removed.
+  if (CompletelyUnroll && LPM != NULL)
+    LPM->deleteLoopFromQueue(L);
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 0000000..61ab3f6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+                   cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+                   cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+  class LowerExpectIntrinsic : public FunctionPass {
+
+    bool HandleSwitchExpect(SwitchInst *SI);
+
+    bool HandleIfExpect(BranchInst *BI);
+
+  public:
+    static char ID;
+    LowerExpectIntrinsic() : FunctionPass(ID) {
+      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  Type *Int32Ty = Type::getInt32Ty(Context);
+
+  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  std::vector<Value *> Vec;
+  unsigned n = SI->getNumCases();
+  Vec.resize(n + 1); // +1 for MDString
+
+  Vec[0] = MDString::get(Context, "branch_weights");
+  for (unsigned i = 0; i < n; ++i) {
+    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+  }
+
+  MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+  SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  SI->setCondition(ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+  if (BI->isUnconditional())
+    return false;
+
+  // Handle non-optimized IR code like:
+  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %tobool = icmp ne i64 %expval, 0
+  //   br i1 %tobool, label %if.then, label %if.end
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+    return false;
+
+  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  bool Likely = ExpectedValue->isOne();
+
+  // If expect value is equal to 1 it means that we are more likely to take
+  // branch 0, in other case more likely is branch 1.
+  Value *Ops[] = {
+    MDString::get(Context, "branch_weights"),
+    ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+    ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+  };
+
+  MDNode *WeightsNode = MDNode::get(Context, Ops);
+  BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  CmpI->setOperand(0, ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // Create "block_weights" metadata.
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      if (HandleIfExpect(BI))
+        IfHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      if (HandleSwitchExpect(SI))
+        IfHandled++;
+    }
+
+    // remove llvm.expect intrinsics.
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ) {
+      CallInst *CI = dyn_cast<CallInst>(BI++);
+      if (!CI)
+        continue;
+
+      Function *Fn = CI->getCalledFunction();
+      if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+        Value *Exp = CI->getArgOperand(0);
+        CI->replaceAllUsesWith(Exp);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
+  return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+                "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+  return new LowerExpectIntrinsic();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 0000000..c96c8fc
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,608 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding.  This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort().  If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive.  It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+  class LowerInvoke : public FunctionPass {
+    // Used for both models.
+    Constant *AbortFn;
+
+    // Used for expensive EH support.
+    StructType *JBLinkTy;
+    GlobalVariable *JBListHead;
+    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+    bool useExpensiveEHSupport;
+
+    // We peek in TLI to grab the target's jmp_buf size and alignment
+    const TargetLowering *TLI;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit LowerInvoke(const TargetLowering *tli = NULL,
+                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+      : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) {
+      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerSwitchID);
+    }
+
+  private:
+    bool insertCheapEHSupport(Function &F);
+    void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+    void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
+                                SwitchInst *CatchSwitch);
+    bool insertExpensiveEHSupport(Function &F);
+  };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+                "Lower invoke and unwind, for unwindless code generators",
+                false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+  return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+                                          bool useExpensiveEHSupport) {
+  return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+  if (useExpensiveEHSupport) {
+    // Insert a type for the linked list of jump buffers.
+    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+    JBSize = JBSize ? JBSize : 200;
+    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
+    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+    JBLinkTy->setBody(Elts);
+
+    Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+    // Now that we've done that, insert the jmpbuf list head global, unless it
+    // already exists.
+    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
+                                      GlobalValue::LinkOnceAnyLinkage,
+                                      Constant::getNullValue(PtrJBList),
+                                      "llvm.sjljeh.jblist");
+    }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+    SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+   // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+    LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+  }
+
+  // We need the 'write' and 'abort' functions for both models.
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
+  return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+      // Insert a normal call instruction...
+      CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                           CallArgs, "", II);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      NewCall->setAttributes(II->getAttributes());
+      NewCall->setDebugLoc(II->getDebugLoc());
+      II->replaceAllUsesWith(NewCall);
+
+      // Insert an unconditional branch to the normal destination.
+      BranchInst::Create(II->getNormalDest(), II);
+
+      // Remove any PHI node entries from the exception destination.
+      II->getUnwindDest()->removePredecessor(BB);
+
+      // Remove the invoke instruction now.
+      BB->getInstList().erase(II);
+
+      ++NumInvokes; Changed = true;
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // Insert a call to abort()
+      CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+      // Insert a return instruction.  This really should be a "barrier", as it
+      // is unreachable.
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType()->isVoidTy() ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
+
+      // Remove the unwind instruction now.
+      BB->getInstList().erase(UI);
+
+      ++NumUnwinds; Changed = true;
+    }
+  return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                         AllocaInst *InvokeNum,
+                                         AllocaInst *StackPtr,
+                                         SwitchInst *CatchSwitch) {
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  // Insert a store of the stack ptr before the invoke, so we can restore it
+  // later in the exception case.
+  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+  BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
+  // nonvolatile.
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
+
+  Instruction* StackPtrLoad =
+    new LoadInst(StackPtr, "stackptr.restore", true,
+                 II->getUnwindDest()->getFirstInsertionPt());
+  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+    
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+  // Insert a normal call instruction.
+  SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                       CallArgs, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Replace the invoke with an uncond branch.
+  BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+  II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges.  Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge.  This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    Type *Ty = AI->getType();
+    // Aggregate types can't be cast, but are legal argument types, so we have
+    // to handle them differently. We use an extract/insert pair as a
+    // lightweight method to achieve the same goal.
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+      NI->insertAfter(EI);
+      AI->replaceAllUsesWith(NI);
+      // Set the operand of the instructions back to the AllocaInst.
+      EI->setOperand(0, AI);
+      NI->setOperand(0, AI);
+    } else {
+      // This is always a no-op cast because we're casting AI to AI->getType()
+      // so src and destination types are identical. BitCast is the only
+      // possibility.
+      CastInst *NC = new BitCastInst(
+        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+      AI->replaceAllUsesWith(NC);
+      // Set the operand of the cast instruction back to the AllocaInst.
+      // Normally it's forbidden to replace a CastInst's operand because it
+      // could cause the opcode to reflect an illegal conversion. However,
+      // we're replacing it here with the same value it was constructed with.
+      // We do this because the above replaceAllUsesWith() clobbered the
+      // operand, but we want this one to remain.
+      NC->setOperand(0, AI);
+    }
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Scan all of the uses and see if the live range is live across an unwind
+      // edge.  If we find a use live across an invoke edge, create an alloca
+      // and spill the value.
+      std::set<InvokeInst*> InvokesWithStoreInserted;
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+  UnreachableInst* UnreachablePlaceholder = 0;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // TODO: This is not an optimal way to do this.  In particular, this always
+  // inserts setjmp calls into the entries of functions with invoke instructions
+  // even though there are possibly paths through the function that do not
+  // execute any invokes.  In particular, for functions with early exits, e.g.
+  // the 'addMove' method in hexxagon, it would be nice to not have to do the
+  // setjmp stuff on the early exit path.  This requires a bit of dataflow, but
+  // would not be too hard to do.
+
+  // If we have an invoke instruction, insert a setjmp that dominates all
+  // invokes.  After the setjmp, use a cond branch that goes to the original
+  // code path on zero, and to a designated 'catch' block of nonzero.
+  Value *OldJmpBufPtr = 0;
+  if (!Invokes.empty()) {
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);
+
+    BasicBlock *EntryBB = F.begin();
+
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be live across invokes.
+    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+    AllocaInst *JmpBuf =
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
+
+    Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                     ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+    OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
+                                             EntryBB->getTerminator());
+
+    // Copy the JBListHead to the alloca.
+    Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+                                 EntryBB->getTerminator());
+    new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+    // Add the new jumpbuf to the list.
+    new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+    // Create the catch block.  The catch block is basically a big switch
+    // statement that goes to all of the invoke catch blocks.
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+    // Create an alloca which keeps track of the stack pointer before every
+    // invoke, this allows us to properly restore the stack pointer after
+    // long jumping.
+    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+                                          "stackptr", EntryBB->begin());
+
+    // Create an alloca which keeps track of which invoke is currently
+    // executing.  For normal calls it contains zero.
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
+
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call). We insert an unreachable instruction here and
+    // modify the block to jump to the correct unwinding pad later.
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
+
+    Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+    SwitchInst *CatchSwitch =
+      SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+    // Now that things are set up, insert the setjmp call itself.
+
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "setjmp.cont");
+
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+    Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
+                                                 EntryBB->getTerminator());
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
+                                "tmp", EntryBB->getTerminator());
+    Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+                                    EntryBB->getTerminator());
+
+    // Compare the return value to zero.
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
+                                   Constant::getNullValue(SJRet->getType()),
+                                   "notunwind");
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+
+    // Put in a new condbranch in its place.
+    BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+    // At this point, we are all set up, rewrite each invoke instruction.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+  }
+
+  // We know that there is at least one unwind.
+
+  // Create three new blocks, the block to load the jmpbuf ptr and compare
+  // against null, the block to do the longjmp, and the error block for if it
+  // is null.  Add them at the end of the function because they are not hot.
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+  // If this function contains an invoke, restore the old jumpbuf ptr.
+  Value *BufPtr;
+  if (OldJmpBufPtr) {
+    // Before the return, insert a copy from the saved value to the new value.
+    BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+    new StoreInst(BufPtr, JBListHead, UnwindHandler);
+  } else {
+    BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+  }
+
+  // Load the JBList, if it's null, then there was no catch!
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+                                Constant::getNullValue(BufPtr->getType()),
+                                "notnull");
+  BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+  // Create the block to do the longjmp.
+  // Get a pointer to the jmpbuf and longjmp.
+  Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                   ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+  Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
+                           "tmp", UnwindBlock);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
+
+  // Set up the term block ("throw without a catch").
+  new UnreachableInst(F.getContext(), TermBlock);
+
+  // Insert a call to abort()
+  CallInst::Create(AbortFn, "",
+                   TermBlock->getTerminator())->setTailCall();
+
+
+  // Replace all unwinds with a branch to the unwind handler.
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindHandler, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
+
+  // Replace the inserted unreachable with a branch to the unwind handler.
+  if (UnreachablePlaceholder) {
+    BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
+    UnreachablePlaceholder->eraseFromParent();
+  }
+
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, restore the old jmpbuf pointer to its input value.
+  if (OldJmpBufPtr) {
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *R = Returns[i];
+
+      // Before the return, insert a copy from the saved value to the new value.
+      Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+      new StoreInst(OldBuf, JBListHead, true, R);
+    }
+  }
+
+  return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+  if (useExpensiveEHSupport)
+    return insertExpensiveEHSupport(F);
+  else
+    return insertCheapEHSupport(F);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 0000000..686178c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+  /// instructions.
+  class LowerSwitch : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSwitch() : FunctionPass(ID) {
+      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+    } 
+
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerInvokePassID);
+    }
+
+    struct CaseRange {
+      Constant* Low;
+      Constant* High;
+      BasicBlock* BB;
+
+      CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+        Low(low), High(high), BB(bb) { }
+    };
+
+    typedef std::vector<CaseRange>           CaseVector;
+    typedef std::vector<CaseRange>::iterator CaseItr;
+  private:
+    void processSwitchInst(SwitchInst *SI);
+
+    BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+                              BasicBlock* OrigBlock, BasicBlock* Default);
+    BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+                             BasicBlock* OrigBlock, BasicBlock* Default);
+    unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+  };
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const LowerSwitch::CaseRange& C1,
+                      const LowerSwitch::CaseRange& C2) {
+
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+                "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+  return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+  bool Changed = false;
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+      Changed = true;
+      processSwitchInst(SI);
+    }
+  }
+
+  return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C)
+    LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
+  O << "[";
+
+  for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+         E = C.end(); B != E; ) {
+    O << *B->Low << " -" << *B->High;
+    if (++B != E) O << ", ";
+  }
+
+  return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+                                       Value* Val, BasicBlock* OrigBlock,
+                                       BasicBlock* Default)
+{
+  unsigned Size = End - Begin;
+
+  if (Size == 1)
+    return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+  unsigned Mid = Size / 2;
+  std::vector<CaseRange> LHS(Begin, Begin + Mid);
+  DEBUG(dbgs() << "LHS: " << LHS << "\n");
+  std::vector<CaseRange> RHS(Begin + Mid, End);
+  DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+  CaseRange& Pivot = *(Begin + Mid);
+  DEBUG(dbgs() << "Pivot ==> " 
+               << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+               << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+  BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+                                      OrigBlock, Default);
+  BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+                                      OrigBlock, Default);
+
+  // Create a new node that checks if the value is < pivot. Go to the
+  // left branch if it is and right branch if not.
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewNode);
+
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
+  NewNode->getInstList().push_back(Comp);
+  BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+  return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+                                      BasicBlock* OrigBlock,
+                                      BasicBlock* Default)
+{
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewLeaf);
+
+  // Emit comparison
+  ICmpInst* Comp = NULL;
+  if (Leaf.Low == Leaf.High) {
+    // Make the seteq instruction...
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
+  } else {
+    // Make range comparison
+    if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+      // Val >= Min && Val <= Hi --> Val <= Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
+    } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+      // Val >= 0 && Val <= Hi --> Val <=u Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
+    } else {
+      // Emit V-Lo <=u Hi-Lo
+      Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+      Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+                                                   Val->getName()+".off",
+                                                   NewLeaf);
+      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
+    }
+  }
+
+  // Make the conditional branch...
+  BasicBlock* Succ = Leaf.BB;
+  BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+  // If there were any PHI nodes in this successor, rewrite one entry
+  // from OrigBlock to come from NewLeaf.
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode* PN = cast<PHINode>(I);
+    // Remove all but one incoming entries from the cluster
+    uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+                     cast<ConstantInt>(Leaf.Low)->getSExtValue();    
+    for (uint64_t j = 0; j < Range; ++j) {
+      PN->removeIncomingValue(OrigBlock);
+    }
+    
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+  }
+
+  return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+    Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+                              SI->getSuccessorValue(i),
+                              SI->getSuccessor(i)));
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      BasicBlock* nextBB = J->BB;
+      BasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+  BasicBlock *CurBlock = SI->getParent();
+  BasicBlock *OrigBlock = CurBlock;
+  Function *F = CurBlock->getParent();
+  Value *Val = SI->getCondition();  // The value we are switching on...
+  BasicBlock* Default = SI->getDefaultDest();
+
+  // If there is only the default destination, don't bother with the code below.
+  if (SI->getNumCases() == 1) {
+    BranchInst::Create(SI->getDefaultDest(), CurBlock);
+    CurBlock->getInstList().erase(SI);
+    return;
+  }
+
+  // Create a new, empty default block so that the new hierarchy of
+  // if-then statements go to this and the PHI nodes are happy.
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+  F->getBasicBlockList().insert(Default, NewDefault);
+
+  BranchInst::Create(Default, NewDefault);
+
+  // If there is an entry in any PHI nodes for the default edge, make sure
+  // to update them as well.
+  for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+  }
+
+  // Prepare cases vector.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(dbgs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
+  
+  BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+                                          OrigBlock, NewDefault);
+
+  // Branch to our shiny new if-then stuff...
+  BranchInst::Create(SwitchBlock, OrigBlock);
+
+  // We are now done with the switch instruction, delete it.
+  CurBlock->getInstList().erase(SI);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 0000000..f4ca81a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+  struct PromotePass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    PromotePass() : FunctionPass(ID) {
+      initializePromotePassPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnFunction - To run this pass, first we calculate the alloca
+    // instructions that are safe for promotion, then we promote each one.
+    //
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+    }
+  };
+}  // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+
+  bool Changed  = false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (isAllocaPromotable(AI))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    PromoteMemToReg(Allocas, DT);
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+  return new PromotePass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 0000000..db3e942
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1138 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references.  It promotes
+// alloca instructions which only have loads and stores as uses.  An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+//   Programming Languages
+//   POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+  typedef std::pair<BasicBlock*, unsigned> EltTy;
+  static inline EltTy getEmptyKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+  }
+  static inline EltTy getTombstoneKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+  }
+  static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+    return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+  }
+  static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+    return LHS == RHS;
+  }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // FIXME: If the memory unit is of pointer or integer type, we can permit
+  // assignments to subsections of the memory unit.
+
+  // Only allow direct and non-volatile loads and stores...
+  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {   // Loop over all of the uses of the alloca
+    const User *U = *UI;
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Note that atomic loads can be transformed; atomic semantics do
+      // not have any meaning for a local alloca.
+      if (LI->isVolatile())
+        return false;
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI)
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      // Note that atomic stores can be transformed; atomic semantics do
+      // not have any meaning for a local alloca.
+      if (SI->isVolatile())
+        return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
+    } else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+namespace {
+  struct AllocaInfo;
+
+  // Data package used by RenamePass()
+  class RenamePassData {
+  public:
+    typedef std::vector<Value *> ValVector;
+    
+    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+    RenamePassData(BasicBlock *B, BasicBlock *P,
+                   const ValVector &V) : BB(B), Pred(P), Values(V) {}
+    BasicBlock *BB;
+    BasicBlock *Pred;
+    ValVector Values;
+    
+    void swap(RenamePassData &RHS) {
+      std::swap(BB, RHS.BB);
+      std::swap(Pred, RHS.Pred);
+      Values.swap(RHS.Values);
+    }
+  };
+  
+  /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+  /// load/store instructions in the block that directly load or store an alloca.
+  ///
+  /// This functionality is important because it avoids scanning large basic
+  /// blocks multiple times when promoting many allocas in the same block.
+  class LargeBlockInfo {
+    /// InstNumbers - For each instruction that we track, keep the index of the
+    /// instruction.  The index starts out as the number of the instruction from
+    /// the start of the block.
+    DenseMap<const Instruction *, unsigned> InstNumbers;
+  public:
+    
+    /// isInterestingInstruction - This code only looks at accesses to allocas.
+    static bool isInterestingInstruction(const Instruction *I) {
+      return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+    }
+    
+    /// getInstructionIndex - Get or calculate the index of the specified
+    /// instruction.
+    unsigned getInstructionIndex(const Instruction *I) {
+      assert(isInterestingInstruction(I) &&
+             "Not a load/store to/from an alloca?");
+      
+      // If we already have this instruction number, return it.
+      DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+      if (It != InstNumbers.end()) return It->second;
+      
+      // Scan the whole block to get the instruction.  This accumulates
+      // information for every interesting instruction in the block, in order to
+      // avoid gratuitus rescans.
+      const BasicBlock *BB = I->getParent();
+      unsigned InstNo = 0;
+      for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+           BBI != E; ++BBI)
+        if (isInterestingInstruction(BBI))
+          InstNumbers[BBI] = InstNo++;
+      It = InstNumbers.find(I);
+      
+      assert(It != InstNumbers.end() && "Didn't insert instruction?");
+      return It->second;
+    }
+    
+    void deleteValue(const Instruction *I) {
+      InstNumbers.erase(I);
+    }
+    
+    void clear() {
+      InstNumbers.clear();
+    }
+  };
+
+  struct PromoteMem2Reg {
+    /// Allocas - The alloca instructions being promoted.
+    ///
+    std::vector<AllocaInst*> Allocas;
+    DominatorTree &DT;
+    DIBuilder *DIB;
+
+    /// AST - An AliasSetTracker object to update.  If null, don't update it.
+    ///
+    AliasSetTracker *AST;
+    
+    /// AllocaLookup - Reverse mapping of Allocas.
+    ///
+    DenseMap<AllocaInst*, unsigned>  AllocaLookup;
+
+    /// NewPhiNodes - The PhiNodes we're adding.
+    ///
+    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+    
+    /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+    /// it corresponds to.
+    DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+    
+    /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+    /// each alloca that is of pointer type, we keep track of what to copyValue
+    /// to the inserted PHI nodes here.
+    ///
+    std::vector<Value*> PointerAllocaValues;
+
+    /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+    /// intrinsic that describes it, if any, so that we can convert it to a
+    /// dbg.value intrinsic if the alloca gets promoted.
+    SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+    /// Visited - The set of basic blocks the renamer has already visited.
+    ///
+    SmallPtrSet<BasicBlock*, 16> Visited;
+
+    /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+    /// non-determinstic behavior.
+    DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+    DenseMap<DomTreeNode*, unsigned> DomLevels;
+
+    /// BBNumPreds - Lazily compute the number of predecessors a block has.
+    DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+  public:
+    PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+                   AliasSetTracker *ast)
+      : Allocas(A), DT(dt), DIB(0), AST(ast) {}
+    ~PromoteMem2Reg() {
+      delete DIB;
+    }
+
+    void run();
+
+    /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+    ///
+    bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+      return DT.dominates(BB1, BB2);
+    }
+
+  private:
+    void RemoveFromAllocasList(unsigned &AllocaIdx) {
+      Allocas[AllocaIdx] = Allocas.back();
+      Allocas.pop_back();
+      --AllocaIdx;
+    }
+
+    unsigned getNumPreds(const BasicBlock *BB) {
+      unsigned &NP = BBNumPreds[BB];
+      if (NP == 0)
+        NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+      return NP-1;
+    }
+
+    void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                 AllocaInfo &Info);
+    void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                             const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                             SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+    
+    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    
+    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                    RenamePassData::ValVector &IncVals,
+                    std::vector<RenamePassData> &Worklist);
+    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+  };
+  
+  struct AllocaInfo {
+    SmallVector<BasicBlock*, 32> DefiningBlocks;
+    SmallVector<BasicBlock*, 32> UsingBlocks;
+    
+    StoreInst  *OnlyStore;
+    BasicBlock *OnlyBlock;
+    bool OnlyUsedInOneBlock;
+    
+    Value *AllocaPointerVal;
+    DbgDeclareInst *DbgDeclare;
+    
+    void clear() {
+      DefiningBlocks.clear();
+      UsingBlocks.clear();
+      OnlyStore = 0;
+      OnlyBlock = 0;
+      OnlyUsedInOneBlock = true;
+      AllocaPointerVal = 0;
+      DbgDeclare = 0;
+    }
+    
+    /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+    /// ivars.
+    void AnalyzeAlloca(AllocaInst *AI) {
+      clear();
+
+      // As we scan the uses of the alloca instruction, keep track of stores,
+      // and decide whether all of the loads and stores to the alloca are within
+      // the same basic block.
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
+
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+          // Remember the basic blocks which define new values for the alloca
+          DefiningBlocks.push_back(SI->getParent());
+          AllocaPointerVal = SI->getOperand(0);
+          OnlyStore = SI;
+        } else {
+          LoadInst *LI = cast<LoadInst>(User);
+          // Otherwise it must be a load instruction, keep track of variable
+          // reads.
+          UsingBlocks.push_back(LI->getParent());
+          AllocaPointerVal = LI;
+        }
+        
+        if (OnlyUsedInOneBlock) {
+          if (OnlyBlock == 0)
+            OnlyBlock = User->getParent();
+          else if (OnlyBlock != User->getParent())
+            OnlyUsedInOneBlock = false;
+        }
+      }
+      
+      DbgDeclare = FindAllocaDbgDeclare(AI);
+    }
+  };
+
+  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+  struct DomTreeNodeCompare {
+    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+      return LHS.second < RHS.second;
+    }
+  };
+}  // end of anonymous namespace
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
+
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
+
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
+    }
+    I->eraseFromParent();
+  }
+}
+
+void PromoteMem2Reg::run() {
+  Function &F = *DT.getRoot()->getParent();
+
+  if (AST) PointerAllocaValues.resize(Allocas.size());
+  AllocaDbgDeclares.resize(Allocas.size());
+
+  AllocaInfo Info;
+  LargeBlockInfo LBI;
+
+  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+    AllocaInst *AI = Allocas[AllocaNum];
+
+    assert(isAllocaPromotable(AI) &&
+           "Cannot promote non-promotable alloca!");
+    assert(AI->getParent()->getParent() == &F &&
+           "All allocas should be in the same function, which is same as DF!");
+
+    removeLifetimeIntrinsicUsers(AI);
+
+    if (AI->use_empty()) {
+      // If there are no uses of the alloca, just delete it now.
+      if (AST) AST->deleteValue(AI);
+      AI->eraseFromParent();
+
+      // Remove the alloca from the Allocas list, since it has been processed
+      RemoveFromAllocasList(AllocaNum);
+      ++NumDeadAlloca;
+      continue;
+    }
+    
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    Info.AnalyzeAlloca(AI);
+
+    // If there is only a single store to this value, replace any loads of
+    // it that are directly dominated by the definition with the value stored.
+    if (Info.DefiningBlocks.size() == 1) {
+      RewriteSingleStoreAlloca(AI, Info, LBI);
+
+      // Finally, after the scan, check to see if the store is all that is left.
+      if (Info.UsingBlocks.empty()) {
+        // Record debuginfo for the store and remove the declaration's debuginfo.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+          if (!DIB)
+            DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
+          DDI->eraseFromParent();
+        }
+        // Remove the (now dead) store and alloca.
+        Info.OnlyStore->eraseFromParent();
+        LBI.deleteValue(Info.OnlyStore);
+
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        ++NumSingleStore;
+        continue;
+      }
+    }
+    
+    // If the alloca is only read and written in one basic block, just perform a
+    // linear sweep over the block to eliminate it.
+    if (Info.OnlyUsedInOneBlock) {
+      PromoteSingleBlockAlloca(AI, Info, LBI);
+      
+      // Finally, after the scan, check to see if the stores are all that is
+      // left.
+      if (Info.UsingBlocks.empty()) {
+        
+        // Remove the (now dead) stores and alloca.
+        while (!AI->use_empty()) {
+          StoreInst *SI = cast<StoreInst>(AI->use_back());
+          // Record debuginfo for the store before removing it.
+          if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+            if (!DIB)
+              DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+            ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+          }
+          SI->eraseFromParent();
+          LBI.deleteValue(SI);
+        }
+        
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        // The alloca's debuginfo can be removed as well.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare)
+          DDI->eraseFromParent();
+
+        ++NumLocalPromoted;
+        continue;
+      }
+    }
+
+    // If we haven't computed dominator tree levels, do so now.
+    if (DomLevels.empty()) {
+      SmallVector<DomTreeNode*, 32> Worklist;
+
+      DomTreeNode *Root = DT.getRootNode();
+      DomLevels[Root] = 0;
+      Worklist.push_back(Root);
+
+      while (!Worklist.empty()) {
+        DomTreeNode *Node = Worklist.pop_back_val();
+        unsigned ChildLevel = DomLevels[Node] + 1;
+        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+             CI != CE; ++CI) {
+          DomLevels[*CI] = ChildLevel;
+          Worklist.push_back(*CI);
+        }
+      }
+    }
+
+    // If we haven't computed a numbering for the BB's in the function, do so
+    // now.
+    if (BBNumbers.empty()) {
+      unsigned ID = 0;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        BBNumbers[I] = ID++;
+    }
+
+    // If we have an AST to keep updated, remember some pointer value that is
+    // stored into the alloca.
+    if (AST)
+      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+      
+    // Remember the dbg.declare intrinsic describing this alloca, if any.
+    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+    
+    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+    // At this point, we're committed to promoting the alloca using IDF's, and
+    // the standard SSA construction algorithm.  Determine which blocks need PHI
+    // nodes and see if we can optimize out some work by avoiding insertion of
+    // dead phi nodes.
+    DetermineInsertionPoint(AI, AllocaNum, Info);
+  }
+
+  if (Allocas.empty())
+    return; // All of the allocas must have been trivial!
+
+  LBI.clear();
+  
+  
+  // Set the incoming values for the basic block to be null values for all of
+  // the alloca's.  We do this in case there is a load of a value that has not
+  // been stored yet.  In this case, it will get this null value.
+  //
+  RenamePassData::ValVector Values(Allocas.size());
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+  // Walks all basic blocks in the function performing the SSA rename algorithm
+  // and inserting the phi nodes we marked as necessary
+  //
+  std::vector<RenamePassData> RenamePassWorkList;
+  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+  do {
+    RenamePassData RPD;
+    RPD.swap(RenamePassWorkList.back());
+    RenamePassWorkList.pop_back();
+    // RenamePass may add new worklist entries.
+    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+  } while (!RenamePassWorkList.empty());
+  
+  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
+  Visited.clear();
+
+  // Remove the allocas themselves from the function.
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+    Instruction *A = Allocas[i];
+
+    // If there are any uses of the alloca instructions left, they must be in
+    // unreachable basic blocks that were not processed by walking the dominator
+    // tree. Just delete the users now.
+    if (!A->use_empty())
+      A->replaceAllUsesWith(UndefValue::get(A->getType()));
+    if (AST) AST->deleteValue(A);
+    A->eraseFromParent();
+  }
+
+  // Remove alloca's dbg.declare instrinsics from the function.
+  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+      DDI->eraseFromParent();
+
+  // Loop over all of the PHI nodes and see if there are any that we can get
+  // rid of because they merge all of the same incoming values.  This can
+  // happen due to undef values coming into the PHI nodes.  This process is
+  // iterative, because eliminating one PHI node can cause others to be removed.
+  bool EliminatedAPHI = true;
+  while (EliminatedAPHI) {
+    EliminatedAPHI = false;
+    
+    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+      PHINode *PN = I->second;
+
+      // If this PHI node merges one value and/or undefs, get the value.
+      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+        if (AST && PN->getType()->isPointerTy())
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
+      }
+      ++I;
+    }
+  }
+  
+  // At this point, the renamer has added entries to PHI nodes for all reachable
+  // code.  Unfortunately, there may be unreachable blocks which the renamer
+  // hasn't traversed.  If this is the case, the PHI nodes may not
+  // have incoming values for all predecessors.  Loop over all PHI nodes we have
+  // created, inserting undef values if they are missing any incoming values.
+  //
+  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+    // We want to do this once per basic block.  As such, only process a block
+    // when we find the PHI that is the first entry in the block.
+    PHINode *SomePHI = I->second;
+    BasicBlock *BB = SomePHI->getParent();
+    if (&BB->front() != SomePHI)
+      continue;
+
+    // Only do work here if there the PHI nodes are missing incoming values.  We
+    // know that all PHI nodes that were inserted in a block will have the same
+    // number of incoming values, so we can just check any of them.
+    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+      continue;
+
+    // Get the preds for BB.
+    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+    
+    // Ok, now we know that all of the PHI nodes are missing entries for some
+    // basic blocks.  Start by sorting the incoming predecessors for efficient
+    // access.
+    std::sort(Preds.begin(), Preds.end());
+    
+    // Now we loop through all BB's which have entries in SomePHI and remove
+    // them from the Preds list.
+    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+      // Do a log(n) search of the Preds list for the entry we want.
+      SmallVector<BasicBlock*, 16>::iterator EntIt =
+        std::lower_bound(Preds.begin(), Preds.end(),
+                         SomePHI->getIncomingBlock(i));
+      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+             "PHI node has entry for a block which is not a predecessor!");
+
+      // Remove the entry
+      Preds.erase(EntIt);
+    }
+
+    // At this point, the blocks left in the preds list must have dummy
+    // entries inserted into every PHI nodes for the block.  Update all the phi
+    // nodes in this block that we are inserting (there could be phis before
+    // mem2reg runs).
+    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+    BasicBlock::iterator BBI = BB->begin();
+    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+           SomePHI->getNumIncomingValues() == NumBadPreds) {
+      Value *UndefVal = UndefValue::get(SomePHI->getType());
+      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+        SomePHI->addIncoming(UndefVal, Preds[pred]);
+    }
+  }
+        
+  NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
+/// are blocks which lead to uses.  Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                    const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                    SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+  
+  // To determine liveness, we must iterate through the predecessors of blocks
+  // where the def is live.  Blocks are added to the worklist if we need to
+  // check their predecessors.  Start with all the using blocks.
+  SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+                                                   Info.UsingBlocks.end());
+  
+  // If any of the using blocks is also a definition block, check to see if the
+  // definition occurs before or after the use.  If it happens before the use,
+  // the value isn't really live-in.
+  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+    BasicBlock *BB = LiveInBlockWorklist[i];
+    if (!DefBlocks.count(BB)) continue;
+    
+    // Okay, this is a block that both uses and defines the value.  If the first
+    // reference to the alloca is a def (store), then we know it isn't live-in.
+    for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        if (SI->getOperand(1) != AI) continue;
+        
+        // We found a store to the alloca before a load.  The alloca is not
+        // actually live-in here.
+        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+        LiveInBlockWorklist.pop_back();
+        --i, --e;
+        break;
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        if (LI->getOperand(0) != AI) continue;
+        
+        // Okay, we found a load before a store to the alloca.  It is actually
+        // live into this block.
+        break;
+      }
+    }
+  }
+  
+  // Now that we have a set of blocks where the phi is live-in, recursively add
+  // their predecessors until we find the full region the value is live.
+  while (!LiveInBlockWorklist.empty()) {
+    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+    
+    // The block really is live in here, insert it into the set.  If already in
+    // the set, then it has already been processed.
+    if (!LiveInBlocks.insert(BB))
+      continue;
+    
+    // Since the value is live into BB, it is either defined in a predecessor or
+    // live into it to.  Add the preds to the worklist unless they are a
+    // defining block.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      
+      // The value is not live into a predecessor if it defines the value.
+      if (DefBlocks.count(P))
+        continue;
+      
+      // Otherwise it is, add to the worklist.
+      LiveInBlockWorklist.push_back(P);
+    }
+  }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm.  Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                             AllocaInfo &Info) {
+  // Unique the set of defining blocks for efficient lookup.
+  SmallPtrSet<BasicBlock*, 32> DefBlocks;
+  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+  // Determine which blocks the value is live in.  These are blocks which lead
+  // to uses.
+  SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              DomTreeNodeCompare> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+       E = DefBlocks.end(); I != E; ++I) {
+    if (DomTreeNode *Node = DT.getNode(*I))
+      PQ.push(std::make_pair(Node, DomLevels[Node]));
+  }
+
+  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+  SmallPtrSet<DomTreeNode*, 32> Visited;
+  SmallVector<DomTreeNode*, 32> Worklist;
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        DomTreeNode *SuccNode = DT.getNode(*SI);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels[SuccNode];
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!Visited.insert(SuccNode))
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (!LiveInBlocks.count(SuccBB))
+          continue;
+
+        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+        if (!DefBlocks.count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+           ++CI) {
+        if (!Visited.count(*CI))
+          Worklist.push_back(*CI);
+      }
+    }
+  }
+
+  if (DFBlocks.size() > 1)
+    std::sort(DFBlocks.begin(), DFBlocks.end());
+
+  unsigned CurrentVersion = 0;
+  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+                                              AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  StoreInst *OnlyStore = Info.OnlyStore;
+  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+  BasicBlock *StoreBB = OnlyStore->getParent();
+  int StoreIndex = -1;
+
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+    Instruction *UserInst = cast<Instruction>(*UI++);
+    if (!isa<LoadInst>(UserInst)) {
+      assert(UserInst == OnlyStore && "Should only have load/stores");
+      continue;
+    }
+    LoadInst *LI = cast<LoadInst>(UserInst);
+    
+    // Okay, if we have a load from the alloca, we want to replace it with the
+    // only value stored to the alloca.  We can do this if the value is
+    // dominated by the store.  If not, we use the rest of the mem2reg machinery
+    // to insert the phi nodes as needed.
+    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
+      if (LI->getParent() == StoreBB) {
+        // If we have a use that is in the same block as the store, compare the
+        // indices of the two instructions to see which one came first.  If the
+        // load came before the store, we can't handle it.
+        if (StoreIndex == -1)
+          StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+          // Can't handle this load, bail out.
+          Info.UsingBlocks.push_back(StoreBB);
+          continue;
+        }
+        
+      } else if (LI->getParent() != StoreBB &&
+                 !dominates(StoreBB, LI->getParent())) {
+        // If the load and store are in different blocks, use BB dominance to
+        // check their relationships.  If the store doesn't dom the use, bail
+        // out.
+        Info.UsingBlocks.push_back(LI->getParent());
+        continue;
+      }
+    }
+    
+    // Otherwise, we *can* safely rewrite this load.
+    Value *ReplVal = OnlyStore->getOperand(0);
+    // If the replacement value is the load, this must occur in unreachable
+    // code.
+    if (ReplVal == LI)
+      ReplVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(ReplVal);
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+  bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+                  const std::pair<unsigned, StoreInst*> &RHS) {
+    return LHS.first < RHS.first;
+  }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true.  This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths.  e.g. code like
+/// this is potentially correct:
+///
+///   for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  // The trickiest case to handle is when we have large blocks. Because of this,
+  // this code is optimized assuming that large blocks happen.  This does not
+  // significantly pessimize the small block case.  This uses LargeBlockInfo to
+  // make it efficient to get the index of various operations in the block.
+  
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  // Walk the use-def list of the alloca, getting the locations of all stores.
+  typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+  StoresByIndexTy StoresByIndex;
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+       UI != E; ++UI) 
+    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+  // If there are no stores to the alloca, just replace any loads with undef.
+  if (StoresByIndex.empty()) {
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
+      if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+        if (AST && LI->getType()->isPointerTy())
+          AST->deleteValue(LI);
+        LBI.deleteValue(LI);
+        LI->eraseFromParent();
+      }
+    return;
+  }
+  
+  // Sort the stores by their index, making it efficient to do a lookup with a
+  // binary search.
+  std::sort(StoresByIndex.begin(), StoresByIndex.end());
+  
+  // Walk all of the loads from this alloca, replacing them with the nearest
+  // store above them, if any.
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+    if (!LI) continue;
+    
+    unsigned LoadIdx = LBI.getInstructionIndex(LI);
+    
+    // Find the nearest store that has a lower than this load. 
+    StoresByIndexTy::iterator I = 
+      std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+                       StoreIndexSearchPredicate());
+    
+    // If there is no store before this load, then we can't promote this load.
+    if (I == StoresByIndex.begin()) {
+      // Can't handle this load, bail out.
+      Info.UsingBlocks.push_back(LI->getParent());
+      continue;
+    }
+      
+    // Otherwise, there was a store before this load, the load takes its value.
+    --I;
+    LI->replaceAllUsesWith(I->second->getOperand(0));
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+                                  unsigned &Version) {
+  // Look up the basic-block in question.
+  PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+  // If the BB already has a phi node added for the i'th alloca then we're done!
+  if (PN) return false;
+
+  // Create a PhiNode using the dereferenced type... and add the phi-node to the
+  // BasicBlock.
+  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
+  ++NumPHIInsert;
+  PhiToAllocaMap[PN] = AllocaNo;
+
+  if (AST && PN->getType()->isPointerTy())
+    AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+  return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting.  IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                                RenamePassData::ValVector &IncomingVals,
+                                std::vector<RenamePassData> &Worklist) {
+NextIteration:
+  // If we are inserting any phi nodes into this BB, they will already be in the
+  // block.
+  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+    // If we have PHI nodes to update, compute the number of edges from Pred to
+    // BB.
+    if (PhiToAllocaMap.count(APN)) {
+      // We want to be able to distinguish between PHI nodes being inserted by
+      // this invocation of mem2reg from those phi nodes that already existed in
+      // the IR before mem2reg was run.  We determine that APN is being inserted
+      // because it is missing incoming edges.  All other PHI nodes being
+      // inserted by this pass of mem2reg will have the same number of incoming
+      // operands so far.  Remember this count.
+      unsigned NewPHINumOperands = APN->getNumOperands();
+      
+      unsigned NumEdges = 0;
+      for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+        if (*I == BB)
+          ++NumEdges;
+      assert(NumEdges && "Must be at least one edge from Pred to BB!");
+      
+      // Add entries for all the phis.
+      BasicBlock::iterator PNI = BB->begin();
+      do {
+        unsigned AllocaNo = PhiToAllocaMap[APN];
+        
+        // Add N incoming values to the PHI node.
+        for (unsigned i = 0; i != NumEdges; ++i)
+          APN->addIncoming(IncomingVals[AllocaNo], Pred);
+        
+        // The currently active variable for this block is now the PHI.
+        IncomingVals[AllocaNo] = APN;
+        
+        // Get the next phi node.
+        ++PNI;
+        APN = dyn_cast<PHINode>(PNI);
+        if (APN == 0) break;
+        
+        // Verify that it is missing entries.  If not, it is not being inserted
+        // by this mem2reg invocation so we want to ignore it.
+      } while (APN->getNumOperands() == NewPHINumOperands);
+    }
+  }
+  
+  // Don't revisit blocks.
+  if (!Visited.insert(BB)) return;
+
+  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+    Instruction *I = II++; // get the instruction, increment iterator
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+      if (!Src) continue;
+  
+      DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+      if (AI == AllocaLookup.end()) continue;
+
+      Value *V = IncomingVals[AI->second];
+
+      // Anything using the load now uses the current value.
+      LI->replaceAllUsesWith(V);
+      if (AST && LI->getType()->isPointerTy())
+        AST->deleteValue(LI);
+      BB->getInstList().erase(LI);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Delete this instruction and mark the name as the current holder of the
+      // value
+      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+      if (!Dest) continue;
+      
+      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+      if (ai == AllocaLookup.end())
+        continue;
+      
+      // what value were we writing?
+      IncomingVals[ai->second] = SI->getOperand(0);
+      // Record debuginfo for the store before removing it.
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+        if (!DIB)
+          DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      }
+      BB->getInstList().erase(SI);
+    }
+  }
+
+  // 'Recurse' to our successors.
+  succ_iterator I = succ_begin(BB), E = succ_end(BB);
+  if (I == E) return;
+
+  // Keep track of the successors so we don't visit the same successor twice
+  SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+  // Handle the first successor without using the worklist.
+  VisitedSuccs.insert(*I);
+  Pred = BB;
+  BB = *I;
+  ++I;
+
+  for (; I != E; ++I)
+    if (VisitedSuccs.insert(*I))
+      Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+  goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate.  This function does
+/// not modify the CFG of the function at all.  All allocas must be from the
+/// same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+                           DominatorTree &DT, AliasSetTracker *AST) {
+  // If there is nothing to do, bail out...
+  if (Allocas.empty()) return;
+
+  PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 0000000..fa8061c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,520 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  ProtoType = Ty;
+  ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+  assert(ProtoType == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+                            DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  unsigned PHINumValues = PHI->getNumIncomingValues();
+  if (PHINumValues != ValueMapping.size())
+    return false;
+
+  // Scan the phi to see if it matches.
+  for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+    if (ValueMapping[PHI->getIncomingBlock(i)] !=
+        PHI->getIncomingValue(i)) {
+      return false;
+    }
+
+  return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!HasValueForBlock(BB))
+    return GetValueAtEndOfBlock(BB);
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(ProtoType);
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // Otherwise, we do need a PHI: check to see if we already have one available
+  // in this block that produces the right value.
+  if (isa<PHINode>(BB->begin())) {
+    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+                                               PredValues.end());
+    PHINode *SomePHI;
+    for (BasicBlock::iterator It = BB->begin();
+         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+      if (IsEquivalentPHI(SomePHI, ValueMapping))
+        return SomePHI;
+    }
+  }
+
+  // Ok, we have no way out, insert a new one now.
+  PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+                                         ProtoName, &BB->front());
+
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *V = SimplifyInstruction(InsertedPHI)) {
+    InsertedPHI->eraseFromParent();
+    return V;
+  }
+
+  // Set DebugLoc.
+  InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueInMiddleOfBlock(User->getParent());
+
+  U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueAtEndOfBlock(User->getParent());
+  
+  U.set(V);
+}
+
+/// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+  class PHIiter {
+  private:
+    PHINode *PHI;
+    unsigned idx;
+
+  public:
+    explicit PHIiter(PHINode *P) // begin iterator
+      : PHI(P), idx(0) {}
+    PHIiter(PHINode *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+    PHIiter &operator++() { ++idx; return *this; } 
+    bool operator==(const PHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const PHIiter& x) const { return !operator==(x); }
+    Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+    BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+  };
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+  typedef BasicBlock BlkT;
+  typedef Value *ValT;
+  typedef PHINode PhiT;
+
+  typedef succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+  typedef PHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
+
+  /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+  /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+  static void FindPredecessorBlocks(BasicBlock *BB,
+                                    SmallVectorImpl<BasicBlock*> *Preds) {
+    // We can get our predecessor info by walking the pred_iterator list,
+    // but it is relatively slow.  If we already have PHI nodes in this
+    // block, walk one of them to get the predecessor list instead.
+    if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+        Preds->push_back(SomePhi->getIncomingBlock(PI));
+    } else {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Preds->push_back(*PI);
+    }
+  }
+
+  /// GetUndefVal - Get an undefined value of the same type as the value
+  /// being handled.
+  static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+    return UndefValue::get(Updater->ProtoType);
+  }
+
+  /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+  /// Reserve space for the operands but do not fill them in yet.
+  static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+                               SSAUpdater *Updater) {
+    PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+                                   Updater->ProtoName, &BB->front());
+    return PHI;
+  }
+
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+    PHI->addIncoming(Val, Pred);
+  }
+
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static PHINode *InstrIsPHI(Instruction *I) {
+    return dyn_cast<PHINode>(I);
+  }
+
+  /// ValueIsPHI - Check if a value is a PHI.
+  ///
+  static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+    return dyn_cast<PHINode>(Val);
+  }
+
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+    PHINode *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumIncomingValues() == 0)
+      return PHI;
+    return 0;
+  }
+
+  /// GetPHIValue - For the specified PHI instruction, return the value
+  /// that it defines.
+  static Value *GetPHIValue(PHINode *PHI) {
+    return PHI;
+  }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
+  if (Insts.empty()) return;
+  
+  Value *SomeVal;
+  if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+    SomeVal = LI;
+  else
+    SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+  if (BaseName.empty())
+    BaseName = SomeVal->getName();
+  SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+  
+  // First step: bucket up uses of the alloca by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the function with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  // Walk the uses in the use-list order to be determinstic.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    BasicBlock *BB = User->getParent();
+    TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        updateDebugInfo(SI);
+        SSA.AddAvailableValue(BB, SI->getOperand(0));
+      } else 
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, check to see if this block is all loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
+    
+    // If so, we can queue them all as live in loads.  We don't have an
+    // efficient way to tell which on is first in the block and don't want to
+    // scan large blocks, so just add all loads as live ins.
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!isInstInList(L, Insts)) continue;
+        
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          replaceLoadWithValue(L, StoredValue);
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!isInstInList(SI, Insts)) continue;
+        updateDebugInfo(SI);
+
+        // Remember that this is the active value in the block.
+        StoredValue = SI->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    replaceLoadWithValue(ALoad, NewVal);
+
+    // Avoid assertions in unreachable code.
+    if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+    ALoad->replaceAllUsesWith(NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // Allow the client to do stuff before we start nuking things.
+  doExtraRewritesBeforeFinalDeletion();
+  
+  // Now that everything is rewritten, delete the old instructions from the
+  // function.  They should all be dead now.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+      User->replaceAllUsesWith(NewVal);
+    }
+    
+    instructionDeleted(User);
+    User->eraseFromParent();
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 0000000..b8c3ab4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2912 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+   cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+       cl::desc("Duplicate return instructions into unconditional branches"));
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+class SimplifyCFGOpt {
+  const TargetData *const TD;
+
+  Value *isValueEqualityComparison(TerminatorInst *TI);
+  BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+    std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+  bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                                     BasicBlock *Pred,
+                                                     IRBuilder<> &Builder);
+  bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+                                           IRBuilder<> &Builder);
+
+  bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+  bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+  bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
+  bool SimplifyUnreachable(UnreachableInst *UI);
+  bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+  bool SimplifyIndirectBr(IndirectBrInst *IBI);
+  bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
+  bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+
+public:
+  explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+  bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+  if (SI1 == SI2) return false;  // Can't merge with self!
+  
+  // It is not safe to merge these two switch instructions if they have a common
+  // successor, and if that successor has a PHI node, and if *that* PHI node has
+  // conflicting incoming values from the two switch blocks.
+  BasicBlock *SI1BB = SI1->getParent();
+  BasicBlock *SI2BB = SI2->getParent();
+  SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+  
+  for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+    if (SI1Succs.count(*I))
+      for (BasicBlock::iterator BBI = (*I)->begin();
+           isa<PHINode>(BBI); ++BBI) {
+        PHINode *PN = cast<PHINode>(BBI);
+        if (PN->getIncomingValueForBlock(SI1BB) !=
+            PN->getIncomingValueForBlock(SI2BB))
+          return false;
+      }
+        
+  return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block.  The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+                                  BasicBlock *ExistPred) {
+  if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+  
+  PHINode *PN;
+  for (BasicBlock::iterator I = Succ->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I)
+    PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+                             BasicBlock *&IfFalse) {
+  PHINode *SomePHI = cast<PHINode>(BB->begin());
+  assert(SomePHI->getNumIncomingValues() == 2 &&
+         "Function can only handle blocks with 2 predecessors!");
+  BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+  BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+  if (Pred1Br == 0 || Pred2Br == 0)
+    return 0;
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (Pred2->getSinglePredecessor() == 0)
+      return 0;
+    
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+  if (BI == 0) return 0;
+  
+  assert(BI->isConditional() && "Two successors but not conditional?");
+  if (BI->getSuccessor(0) == Pred1) {
+    IfTrue = Pred1;
+    IfFalse = Pred2;
+  } else {
+    IfTrue = Pred2;
+    IfFalse = Pred1;
+  }
+  return BI->getCondition();
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block.  We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping.  If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands.  If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+                                SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+                                unsigned &CostRemaining) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    // Non-instructions all dominate instructions, but not all constantexprs
+    // can be executed unconditionally.
+    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+      if (C->canTrap())
+        return false;
+    return true;
+  }
+  BasicBlock *PBB = I->getParent();
+
+  // We don't want to allow weird loops that might have the "if condition" in
+  // the bottom of this block.
+  if (PBB == BB) return false;
+
+  // If this instruction is defined in a block that contains an unconditional
+  // branch to BB, then it must be in the 'conditional' part of the "if
+  // statement".  If not, it definitely dominates the region.
+  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+  if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+    return true;
+
+  // If we aren't allowing aggressive promotion anymore, then don't consider
+  // instructions in the 'if region'.
+  if (AggressiveInsts == 0) return false;
+  
+  // If we have seen this instruction before, don't count it again.
+  if (AggressiveInsts->count(I)) return true;
+
+  // Okay, it looks like the instruction IS in the "condition".  Check to
+  // see if it's a cheap instruction to unconditionally compute, and if it
+  // only uses stuff defined outside of the condition.  If so, hoist it out.
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+
+  unsigned Cost = 0;
+
+  switch (I->getOpcode()) {
+  default: return false;  // Cannot hoist this out safely.
+  case Instruction::Load:
+    // We have to check to make sure there are no instructions before the
+    // load in its basic block, as we are going to hoist the load out to its
+    // predecessor.
+    if (PBB->getFirstNonPHIOrDbg() != I)
+      return false;
+    Cost = 1;
+    break;
+  case Instruction::GetElementPtr:
+    // GEPs are cheap if all indices are constant.
+    if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
+      return false;
+    Cost = 1;
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::ICmp:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    Cost = 1;
+    break;   // These are all cheap and non-trapping instructions.
+
+  case Instruction::Select:
+    Cost = 2;
+    break;
+  }
+
+  if (Cost > CostRemaining)
+    return false;
+
+  CostRemaining -= Cost;
+
+  // Okay, we can only really hoist these out if their operands do
+  // not take us over the cost threshold.
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+      return false;
+  // Okay, it's safe to do this!  Remember this instruction.
+  AggressiveInsts->insert(I);
+  return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+  // Normal constant int.
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+  if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+    return CI;
+
+  // This is some kind of pointer constant. Turn it into a pointer-sized
+  // ConstantInt if possible.
+  IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+  if (isa<ConstantPointerNull>(V))
+    return ConstantInt::get(PtrTy, 0);
+
+  // IntToPtr const int.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        // The constant is very likely to have the right type already.
+        if (CI->getType() == PtrTy)
+          return CI;
+        else
+          return cast<ConstantInt>
+            (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+      }
+  return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+                       const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  // If this is an icmp against a constant, handle this as one of the cases.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+    if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+      if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+        UsedICmps++;
+        Vals.push_back(C);
+        return I->getOperand(0);
+      }
+      
+      // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+      // the set.
+      ConstantRange Span =
+        ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+      
+      // If this is an and/!= check then we want to optimize "x ugt 2" into
+      // x != 0 && x != 1.
+      if (!isEQ)
+        Span = Span.inverse();
+      
+      // If there are a ton of values, we don't want to make a ginormous switch.
+      if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+          // We don't handle wrapped sets yet.
+          Span.isWrappedSet())
+        return 0;
+      
+      for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+        Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+      UsedICmps++;
+      return I->getOperand(0);
+    }
+    return 0;
+  }
+  
+  // Otherwise, we can only handle an | or &, depending on isEQ.
+  if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+    return 0;
+  
+  unsigned NumValsBeforeLHS = Vals.size();
+  unsigned UsedICmpsBeforeLHS = UsedICmps;
+  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+                                          isEQ, UsedICmps)) {
+    unsigned NumVals = Vals.size();
+    unsigned UsedICmpsBeforeRHS = UsedICmps;
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps)) {
+      if (LHS == RHS)
+        return LHS;
+      Vals.resize(NumVals);
+      UsedICmps = UsedICmpsBeforeRHS;
+    }
+
+    // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+    // set it and return success.
+    if (Extra == 0 || Extra == I->getOperand(1)) {
+      Extra = I->getOperand(1);
+      return LHS;
+    }
+    
+    Vals.resize(NumValsBeforeLHS);
+    UsedICmps = UsedICmpsBeforeLHS;
+    return 0;
+  }
+  
+  // If the LHS can't be folded in, but Extra is available and RHS can, try to
+  // use LHS as Extra.
+  if (Extra == 0 || Extra == I->getOperand(0)) {
+    Value *OldExtra = Extra;
+    Extra = I->getOperand(0);
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps))
+      return RHS;
+    assert(Vals.size() == NumValsBeforeLHS);
+    Extra = OldExtra;
+  }
+  
+  return 0;
+}
+      
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+  Instruction* Cond = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cond = dyn_cast<Instruction>(SI->getCondition());
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isConditional())
+      Cond = dyn_cast<Instruction>(BI->getCondition());
+  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+    Cond = dyn_cast<Instruction>(IBI->getAddress());
+  }
+
+  TI->eraseFromParent();
+  if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+  Value *CV = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    // Do not permit merging of large switch instructions into their
+    // predecessors unless there is only one predecessor.
+    if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+                                             pred_end(SI->getParent())) <= 128)
+      CV = SI->getCondition();
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (BI->isConditional() && BI->getCondition()->hasOneUse())
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+             ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+            GetConstantInt(ICI->getOperand(1), TD))
+          CV = ICI->getOperand(0);
+
+  // Unwrap any lossless ptrtoint cast.
+  if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+      CV = PTII->getOperand(0);
+  return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+                                std::vector<std::pair<ConstantInt*,
+                                                      BasicBlock*> > &Cases) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cases.reserve(SI->getNumCases());
+    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+      Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+    return SI->getDefaultDest();
+  }
+
+  BranchInst *BI = cast<BranchInst>(TI);
+  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
+                                 BI->getSuccessor(ICI->getPredicate() ==
+                                                  ICmpInst::ICMP_NE)));
+  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+               std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+    if (Cases[i].second == BB) {
+      Cases.erase(Cases.begin()+i);
+      --i; --e;
+    }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+              std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+  // Make V1 be smaller than V2.
+  if (V1->size() > V2->size())
+    std::swap(V1, V2);
+
+  if (V1->size() == 0) return false;
+  if (V1->size() == 1) {
+    // Just scan V2.
+    ConstantInt *TheVal = (*V1)[0].first;
+    for (unsigned i = 0, e = V2->size(); i != e; ++i)
+      if (TheVal == (*V2)[i].first)
+        return true;
+  }
+
+  // Otherwise, just sort both lists and compare element by element.
+  array_pod_sort(V1->begin(), V1->end());
+  array_pod_sort(V2->begin(), V2->end());
+  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+  while (i1 != e1 && i2 != e2) {
+    if ((*V1)[i1].first == (*V2)[i2].first)
+      return true;
+    if ((*V1)[i1].first < (*V2)[i2].first)
+      ++i1;
+    else
+      ++i2;
+  }
+  return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison.  If so, simplify TI.  This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                              BasicBlock *Pred,
+                                              IRBuilder<> &Builder) {
+  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+  if (!PredVal) return false;  // Not a value comparison in predecessor.
+
+  Value *ThisVal = isValueEqualityComparison(TI);
+  assert(ThisVal && "This isn't a value comparison!!");
+  if (ThisVal != PredVal) return false;  // Different predicates.
+
+  // Find out information about when control will move from Pred to TI's block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+  BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+                                                        PredCases);
+  EliminateBlockCases(PredDef, PredCases);  // Remove default from cases.
+
+  // Find information about how control leaves this block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+  EliminateBlockCases(ThisDef, ThisCases);  // Remove default from cases.
+
+  // If TI's block is the default block from Pred's comparison, potentially
+  // simplify TI based on this knowledge.
+  if (PredDef == TI->getParent()) {
+    // If we are here, we know that the value is none of those cases listed in
+    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
+    // can simplify TI.
+    if (!ValuesOverlap(PredCases, ThisCases))
+      return false;
+    
+    if (isa<BranchInst>(TI)) {
+      // Okay, one of the successors of this condbr is dead.  Convert it to a
+      // uncond br.
+      assert(ThisCases.size() == 1 && "Branch can only have one case!");
+      // Insert the new branch.
+      Instruction *NI = Builder.CreateBr(ThisDef);
+      (void) NI;
+
+      // Remove PHI node entries for the dead edge.
+      ThisCases[0].second->removePredecessor(TI->getParent());
+
+      DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+           << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+      EraseTerminatorInstAndDCECond(TI);
+      return true;
+    }
+      
+    SwitchInst *SI = cast<SwitchInst>(TI);
+    // Okay, TI has cases that are statically dead, prune them away.
+    SmallPtrSet<Constant*, 16> DeadCases;
+    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+      DeadCases.insert(PredCases[i].first);
+
+    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+                 << "Through successor TI: " << *TI);
+
+    for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+      if (DeadCases.count(SI->getCaseValue(i))) {
+        SI->getSuccessor(i)->removePredecessor(TI->getParent());
+        SI->removeCase(i);
+      }
+
+    DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+    return true;
+  }
+  
+  // Otherwise, TI's block must correspond to some matched value.  Find out
+  // which value (or set of values) this is.
+  ConstantInt *TIV = 0;
+  BasicBlock *TIBB = TI->getParent();
+  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+    if (PredCases[i].second == TIBB) {
+      if (TIV != 0)
+        return false;  // Cannot handle multiple values coming to this block.
+      TIV = PredCases[i].first;
+    }
+  assert(TIV && "No edge from pred to succ?");
+
+  // Okay, we found the one constant that our value can be if we get into TI's
+  // BB.  Find out which successor will unconditionally be branched to.
+  BasicBlock *TheRealDest = 0;
+  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+    if (ThisCases[i].first == TIV) {
+      TheRealDest = ThisCases[i].second;
+      break;
+    }
+
+  // If not handled by any explicit cases, it is handled by the default case.
+  if (TheRealDest == 0) TheRealDest = ThisDef;
+
+  // Remove PHI node entries for dead edges.
+  BasicBlock *CheckEdge = TheRealDest;
+  for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+    if (*SI != CheckEdge)
+      (*SI)->removePredecessor(TIBB);
+    else
+      CheckEdge = 0;
+
+  // Insert the new branch.
+  Instruction *NI = Builder.CreateBr(TheRealDest);
+  (void) NI;
+
+  DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+            << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+  EraseTerminatorInstAndDCECond(TI);
+  return true;
+}
+
+namespace {
+  /// ConstantIntOrdering - This class implements a stable ordering of constant
+  /// integers that does not depend on their address.  This is important for
+  /// applications that sort ConstantInt's to ensure uniqueness.
+  struct ConstantIntOrdering {
+    bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+      return LHS->getValue().ult(RHS->getValue());
+    }
+  };
+}
+
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+  const ConstantInt *LHS = *(const ConstantInt**)P1;
+  const ConstantInt *RHS = *(const ConstantInt**)P2;
+  if (LHS->getValue().ult(RHS->getValue()))
+    return 1;
+  if (LHS->getValue() == RHS->getValue())
+    return 0;
+  return -1;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value.  If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+                                                         IRBuilder<> &Builder) {
+  BasicBlock *BB = TI->getParent();
+  Value *CV = isValueEqualityComparison(TI);  // CondVal
+  assert(CV && "Not a comparison?");
+  bool Changed = false;
+
+  SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.pop_back_val();
+
+    // See if the predecessor is a comparison with the same value.
+    TerminatorInst *PTI = Pred->getTerminator();
+    Value *PCV = isValueEqualityComparison(PTI);  // PredCondVal
+
+    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+      // Figure out which 'cases' to copy from SI to PSI.
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+      // Based on whether the default edge from PTI goes to BB or not, fill in
+      // PredCases and PredDefault with the new switch cases we would like to
+      // build.
+      SmallVector<BasicBlock*, 8> NewSuccessors;
+
+      if (PredDefault == BB) {
+        // If this is the default destination from PTI, only the edges in TI
+        // that don't occur in PTI, or that branch to BB will be activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second != BB)
+            PTIHandled.insert(PredCases[i].first);
+          else {
+            // The default destination is BB, we don't need explicit targets.
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Reconstruct the new switch statement we will be building.
+        if (PredDefault != BBDefault) {
+          PredDefault->removePredecessor(Pred);
+          PredDefault = BBDefault;
+          NewSuccessors.push_back(BBDefault);
+        }
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (!PTIHandled.count(BBCases[i].first) &&
+              BBCases[i].second != BBDefault) {
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+          }
+
+      } else {
+        // If this is not the default destination from PSI, only the edges
+        // in SI that occur in PSI with a destination of BB will be
+        // activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second == BB) {
+            PTIHandled.insert(PredCases[i].first);
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Okay, now we know which constants were sent to BB from the
+        // predecessor.  Figure out where they will all go now.
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (PTIHandled.count(BBCases[i].first)) {
+            // If this is one we are capable of getting...
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+            PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+          }
+
+        // If there are any constants vectored to BB that TI doesn't handle,
+        // they must go to the default destination of TI.
+        for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = 
+                                    PTIHandled.begin(),
+               E = PTIHandled.end(); I != E; ++I) {
+          PredCases.push_back(std::make_pair(*I, BBDefault));
+          NewSuccessors.push_back(BBDefault);
+        }
+      }
+
+      // Okay, at this point, we know which new successor Pred will get.  Make
+      // sure we update the number of entries in the PHI nodes for these
+      // successors.
+      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+      Builder.SetInsertPoint(PTI);
+      // Convert pointer to int before we switch.
+      if (CV->getType()->isPointerTy()) {
+        assert(TD && "Cannot switch on pointer without TargetData");
+        CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+                                    "magicptr");
+      }
+
+      // Now that the successors are updated, create the new Switch instruction.
+      SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
+                                               PredCases.size());
+      NewSI->setDebugLoc(PTI->getDebugLoc());
+      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+        NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+      EraseTerminatorInstAndDCECond(PTI);
+
+      // Okay, last check.  If BB is still a successor of PSI, then we must
+      // have an infinite loop case.  If so, add an infinitely looping block
+      // to handle the case to preserve the behavior of the code.
+      BasicBlock *InfLoopBlock = 0;
+      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+        if (NewSI->getSuccessor(i) == BB) {
+          if (InfLoopBlock == 0) {
+            // Insert it at the end of the function, because it's either code,
+            // or it won't matter if it's hot. :)
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
+            BranchInst::Create(InfLoopBlock, InfLoopBlock);
+          }
+          NewSI->setSuccessor(i, InfLoopBlock);
+        }
+
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+                                Instruction *I1, Instruction *I2) {
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block.  The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+  // This does very trivial matching, with limited scanning, to find identical
+  // instructions in the two blocks.  In particular, we don't want to get into
+  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
+  // such, we currently just scan for obviously identical instructions in an
+  // identical order.
+  BasicBlock *BB1 = BI->getSuccessor(0);  // The true destination.
+  BasicBlock *BB2 = BI->getSuccessor(1);  // The false destination
+
+  BasicBlock::iterator BB1_Itr = BB1->begin();
+  BasicBlock::iterator BB2_Itr = BB2->begin();
+
+  Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+  // Skip debug info if it is not identical.
+  DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+  DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+  if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+    while (isa<DbgInfoIntrinsic>(I1))
+      I1 = BB1_Itr++;
+    while (isa<DbgInfoIntrinsic>(I2))
+      I2 = BB2_Itr++;
+  }
+  if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+    return false;
+
+  // If we get here, we can hoist at least one instruction.
+  BasicBlock *BIParent = BI->getParent();
+
+  do {
+    // If we are hoisting the terminator instruction, don't move one (making a
+    // broken BB), instead clone it, and remove BI.
+    if (isa<TerminatorInst>(I1))
+      goto HoistTerminator;
+
+    // For a normal instruction, we just move one to right before the branch,
+    // then replace all uses of the other with the first.  Finally, we remove
+    // the now redundant second instruction.
+    BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+    if (!I2->use_empty())
+      I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
+    I2->eraseFromParent();
+
+    I1 = BB1_Itr++;
+    I2 = BB2_Itr++;
+    // Skip debug info if it is not identical.
+    DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+    DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+    if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+      while (isa<DbgInfoIntrinsic>(I1))
+        I1 = BB1_Itr++;
+      while (isa<DbgInfoIntrinsic>(I2))
+        I2 = BB2_Itr++;
+    }
+  } while (I1->isIdenticalToWhenDefined(I2));
+
+  return true;
+
+HoistTerminator:
+  // It may not be possible to hoist an invoke.
+  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+    return true;
+
+  // Okay, it is safe to hoist the terminator.
+  Instruction *NT = I1->clone();
+  BIParent->getInstList().insert(BI, NT);
+  if (!NT->getType()->isVoidTy()) {
+    I1->replaceAllUsesWith(NT);
+    I2->replaceAllUsesWith(NT);
+    NT->takeName(I1);
+  }
+
+  IRBuilder<true, NoFolder> Builder(NT);
+  // Hoisting one of the terminators from our successor is a great thing.
+  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
+  // nodes, so we insert select instruction to compute the final result.
+  std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V == BB2V) continue;
+      
+      // These values do not agree.  Insert a select instruction before NT
+      // that determines the right value.
+      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+      if (SI == 0) 
+        SI = cast<SelectInst>
+          (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+                                BB1V->getName()+"."+BB2V->getName()));
+
+      // Make the PHI node use the select for all incoming values for BB1/BB2
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+          PN->setIncomingValue(i, SI);
+    }
+  }
+
+  // Update any PHI nodes in our new successors.
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+    AddPredecessorToBlock(*SI, BIParent, BB1);
+
+  EraseTerminatorInstAndDCECond(BI);
+  return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+  // Only speculatively execution a single instruction (not counting the
+  // terminator) for now.
+  Instruction *HInst = NULL;
+  Instruction *Term = BB1->getTerminator();
+  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+       BBI != BBE; ++BBI) {
+    Instruction *I = BBI;
+    // Skip debug info.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    if (I == Term) break;
+
+    if (HInst)
+      return false;
+    HInst = I;
+  }
+  if (!HInst)
+    return false;
+
+  // Be conservative for now. FP select instruction can often be expensive.
+  Value *BrCond = BI->getCondition();
+  if (isa<FCmpInst>(BrCond))
+    return false;
+
+  // If BB1 is actually on the false edge of the conditional branch, remember
+  // to swap the select operands later.
+  bool Invert = false;
+  if (BB1 != BI->getSuccessor(0)) {
+    assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+    Invert = true;
+  }
+
+  // Turn
+  // BB:
+  //     %t1 = icmp
+  //     br i1 %t1, label %BB1, label %BB2
+  // BB1:
+  //     %t3 = add %t2, c
+  //     br label BB2
+  // BB2:
+  // =>
+  // BB:
+  //     %t1 = icmp
+  //     %t4 = add %t2, c
+  //     %t3 = select i1 %t1, %t2, %t3
+  switch (HInst->getOpcode()) {
+  default: return false;  // Not safe / profitable to hoist.
+  case Instruction::Add:
+  case Instruction::Sub:
+    // Not worth doing for vector ops.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Don't mess with vector operations.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;   // These are all cheap and non-trapping instructions.
+  }
+  
+  // If the instruction is obviously dead, don't try to predicate it.
+  if (HInst->use_empty()) {
+    HInst->eraseFromParent();
+    return true;
+  }
+
+  // Can we speculatively execute the instruction? And what is the value 
+  // if the condition is false? Consider the phi uses, if the incoming value
+  // from the "if" block are all the same V, then V is the value of the
+  // select if the condition is false.
+  BasicBlock *BIParent = BI->getParent();
+  SmallVector<PHINode*, 4> PHIUses;
+  Value *FalseV = NULL;
+  
+  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+  for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+       UI != E; ++UI) {
+    // Ignore any user that is not a PHI node in BB2.  These can only occur in
+    // unreachable blocks, because they would not be dominated by the instr.
+    PHINode *PN = dyn_cast<PHINode>(*UI);
+    if (!PN || PN->getParent() != BB2)
+      return false;
+    PHIUses.push_back(PN);
+    
+    Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+    if (!FalseV)
+      FalseV = PHIV;
+    else if (FalseV != PHIV)
+      return false;  // Inconsistent value when condition is false.
+  }
+  
+  assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+  // Do not hoist the instruction if any of its operands are defined but not
+  // used in this BB. The transformation will prevent the operand from
+  // being sunk into the use block.
+  for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
+       i != e; ++i) {
+    Instruction *OpI = dyn_cast<Instruction>(*i);
+    if (OpI && OpI->getParent() == BIParent &&
+        !OpI->isUsedInBasicBlock(BIParent))
+      return false;
+  }
+
+  // If we get here, we can hoist the instruction. Try to place it
+  // before the icmp instruction preceding the conditional branch.
+  BasicBlock::iterator InsertPos = BI;
+  if (InsertPos != BIParent->begin())
+    --InsertPos;
+  // Skip debug info between condition and branch.
+  while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+    --InsertPos;
+  if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+    SmallPtrSet<Instruction *, 4> BB1Insns;
+    for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); 
+        BB1I != BB1E; ++BB1I) 
+      BB1Insns.insert(BB1I);
+    for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+        UI != UE; ++UI) {
+      Instruction *Use = cast<Instruction>(*UI);
+      if (!BB1Insns.count(Use)) continue;
+      
+      // If BrCond uses the instruction that place it just before
+      // branch instruction.
+      InsertPos = BI;
+      break;
+    }
+  } else
+    InsertPos = BI;
+  BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+  // Create a select whose true value is the speculatively executed value and
+  // false value is the previously determined FalseV.
+  IRBuilder<true, NoFolder> Builder(BI);
+  SelectInst *SI;
+  if (Invert)
+    SI = cast<SelectInst>
+      (Builder.CreateSelect(BrCond, FalseV, HInst,
+                            FalseV->getName() + "." + HInst->getName()));
+  else
+    SI = cast<SelectInst>
+      (Builder.CreateSelect(BrCond, HInst, FalseV,
+                            HInst->getName() + "." + FalseV->getName()));
+
+  // Make the PHI node use the select for all incoming values for "then" and
+  // "if" blocks.
+  for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+    PHINode *PN = PHIUses[i];
+    for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+      if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
+        PN->setIncomingValue(j, SI);
+  }
+
+  ++NumSpeculations;
+  return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  unsigned Size = 0;
+  
+  for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+    if (isa<DbgInfoIntrinsic>(BBI))
+      continue;
+    if (Size > 10) return false;  // Don't clone large BB's.
+    ++Size;
+    
+    // We can only support instructions that do not define values that are
+    // live outside of the current basic block.
+    for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+         UI != E; ++UI) {
+      Instruction *U = cast<Instruction>(*UI);
+      if (U->getParent() != BB || isa<PHINode>(U)) return false;
+    }
+    
+    // Looks ok, continue checking.
+  }
+
+  return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+  BasicBlock *BB = BI->getParent();
+  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+  // NOTE: we currently cannot transform this case if the PHI node is used
+  // outside of the block.
+  if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+    return false;
+  
+  // Degenerate case of a single entry PHI.
+  if (PN->getNumIncomingValues() == 1) {
+    FoldSingleEntryPHINodes(PN->getParent());
+    return true;    
+  }
+
+  // Now we know that this block has multiple preds and two succs.
+  if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+  
+  // Okay, this is a simple enough basic block.  See if any phi values are
+  // constants.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+    if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+    
+    // Okay, we now know that all edges from PredBB should be revectored to
+    // branch to RealDest.
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+    
+    if (RealDest == BB) continue;  // Skip self loops.
+    // Skip if the predecessor's terminator is an indirect branch.
+    if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
+    
+    // The dest block might have PHI nodes, other predecessors and other
+    // difficult cases.  Instead of being smart about this, just insert a new
+    // block that jumps to the destination block, effectively splitting
+    // the edge we are about to create.
+    BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                            RealDest->getName()+".critedge",
+                                            RealDest->getParent(), RealDest);
+    BranchInst::Create(RealDest, EdgeBB);
+    
+    // Update PHI nodes.
+    AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+    // BB may have instructions that are being threaded over.  Clone these
+    // instructions into EdgeBB.  We know that there will be no uses of the
+    // cloned instructions outside of EdgeBB.
+    BasicBlock::iterator InsertPt = EdgeBB->begin();
+    DenseMap<Value*, Value*> TranslateMap;  // Track translated values.
+    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+        continue;
+      }
+      // Clone the instruction.
+      Instruction *N = BBI->clone();
+      if (BBI->hasName()) N->setName(BBI->getName()+".c");
+      
+      // Update operands due to translation.
+      for (User::op_iterator i = N->op_begin(), e = N->op_end();
+           i != e; ++i) {
+        DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+        if (PI != TranslateMap.end())
+          *i = PI->second;
+      }
+      
+      // Check for trivial simplification.
+      if (Value *V = SimplifyInstruction(N, TD)) {
+        TranslateMap[BBI] = V;
+        delete N;   // Instruction folded away, don't need actual inst
+      } else {
+        // Insert the new instruction into its new home.
+        EdgeBB->getInstList().insert(InsertPt, N);
+        if (!BBI->use_empty())
+          TranslateMap[BBI] = N;
+      }
+    }
+
+    // Loop over all of the edges from PredBB to BB, changing them to branch
+    // to EdgeBB instead.
+    TerminatorInst *PredBBTI = PredBB->getTerminator();
+    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+      if (PredBBTI->getSuccessor(i) == BB) {
+        BB->removePredecessor(PredBB);
+        PredBBTI->setSuccessor(i, EdgeBB);
+      }
+
+    // Recurse, simplifying any other constants.
+    return FoldCondBranchOnPHI(BI, TD) | true;
+  }
+
+  return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
+  // statement", which has a very simple dominance structure.  Basically, we
+  // are trying to find the condition that is being branched on, which
+  // subsequently causes this merge to happen.  We really want control
+  // dependence information for this check, but simplifycfg can't keep it up
+  // to date, and this catches most of the cases we care about anyway.
+  BasicBlock *BB = PN->getParent();
+  BasicBlock *IfTrue, *IfFalse;
+  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+  if (!IfCond ||
+      // Don't bother if the branch will be constant folded trivially.
+      isa<ConstantInt>(IfCond))
+    return false;
+  
+  // Okay, we found that we can merge this two-entry phi node into a select.
+  // Doing so would require us to fold *all* two entry phi nodes in this block.
+  // At some point this becomes non-profitable (particularly if the target
+  // doesn't support cmov's).  Only do this transformation if there are two or
+  // fewer PHI nodes in this block.
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+    if (NumPhis > 2)
+      return false;
+  
+  // Loop over the PHI's seeing if we can promote them all to select
+  // instructions.  While we are at it, keep track of the instructions
+  // that need to be moved to the dominating block.
+  SmallPtrSet<Instruction*, 4> AggressiveInsts;
+  unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+           MaxCostVal1 = PHINodeFoldingThreshold;
+  
+  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+    PHINode *PN = cast<PHINode>(II++);
+    if (Value *V = SimplifyInstruction(PN, TD)) {
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+                             MaxCostVal0) ||
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+                             MaxCostVal1))
+      return false;
+  }
+  
+  // If we folded the the first phi, PN dangles at this point.  Refresh it.  If
+  // we ran out of PHIs then we simplified them all.
+  PN = dyn_cast<PHINode>(BB->begin());
+  if (PN == 0) return true;
+  
+  // Don't fold i1 branches on PHIs which contain binary operators.  These can
+  // often be turned into switches and other things.
+  if (PN->getType()->isIntegerTy(1) &&
+      (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+       isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+       isa<BinaryOperator>(IfCond)))
+    return false;
+  
+  // If we all PHI nodes are promotable, check to make sure that all
+  // instructions in the predecessor blocks can be promoted as well.  If
+  // not, we won't be able to get rid of the control flow, so it's not
+  // worth promoting to select instructions.
+  BasicBlock *DomBlock = 0;
+  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+    IfBlock1 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock1);
+    for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+    
+  if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+    IfBlock2 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock2);
+    for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+  
+  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+               << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
+      
+  // If we can still promote the PHI nodes after this gauntlet of tests,
+  // do all of the PHI's now.
+  Instruction *InsertPt = DomBlock->getTerminator();
+  IRBuilder<true, NoFolder> Builder(InsertPt);
+  
+  // Move all 'aggressive' instructions, which are defined in the
+  // conditional parts of the if's up to the dominating block.
+  if (IfBlock1)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock1->getInstList(), IfBlock1->begin(),
+                                   IfBlock1->getTerminator());
+  if (IfBlock2)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock2->getInstList(), IfBlock2->begin(),
+                                   IfBlock2->getTerminator());
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    // Change the PHI node into a select instruction.
+    Value *TrueVal  = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+    
+    SelectInst *NV = 
+      cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
+    PN->replaceAllUsesWith(NV);
+    NV->takeName(PN);
+    PN->eraseFromParent();
+  }
+  
+  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+  // has been flattened.  Change DomBlock to jump directly to our new block to
+  // avoid other simplifycfg's kicking in on the diamond.
+  TerminatorInst *OldTI = DomBlock->getTerminator();
+  Builder.SetInsertPoint(OldTI);
+  Builder.CreateBr(BB);
+  OldTI->eraseFromParent();
+  return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, 
+                                           IRBuilder<> &Builder) {
+  assert(BI->isConditional() && "Must be a conditional branch");
+  BasicBlock *TrueSucc = BI->getSuccessor(0);
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+  ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+  
+  // Check to ensure both blocks are empty (just a return) or optionally empty
+  // with PHI nodes.  If there are other instructions, merging would cause extra
+  // computation on one path or the other.
+  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+
+  Builder.SetInsertPoint(BI);
+  // Okay, we found a branch that is going to two return nodes.  If
+  // there is no return value for this function, just change the
+  // branch into a return.
+  if (FalseRet->getNumOperands() == 0) {
+    TrueSucc->removePredecessor(BI->getParent());
+    FalseSucc->removePredecessor(BI->getParent());
+    Builder.CreateRetVoid();
+    EraseTerminatorInstAndDCECond(BI);
+    return true;
+  }
+    
+  // Otherwise, figure out what the true and false return values are
+  // so we can insert a new select instruction.
+  Value *TrueValue = TrueRet->getReturnValue();
+  Value *FalseValue = FalseRet->getReturnValue();
+  
+  // Unwrap any PHI nodes in the return blocks.
+  if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+    if (TVPN->getParent() == TrueSucc)
+      TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+  if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+    if (FVPN->getParent() == FalseSucc)
+      FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+  
+  // In order for this transformation to be safe, we must be able to
+  // unconditionally execute both operands to the return.  This is
+  // normally the case, but we could have a potentially-trapping
+  // constant expression that prevents this transformation from being
+  // safe.
+  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+    if (TCV->canTrap())
+      return false;
+  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+    if (FCV->canTrap())
+      return false;
+  
+  // Okay, we collected all the mapped values and checked them for sanity, and
+  // defined to really do this transformation.  First, update the CFG.
+  TrueSucc->removePredecessor(BI->getParent());
+  FalseSucc->removePredecessor(BI->getParent());
+  
+  // Insert select instructions where needed.
+  Value *BrCond = BI->getCondition();
+  if (TrueValue) {
+    // Insert a select if the results differ.
+    if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+    } else if (isa<UndefValue>(TrueValue)) {
+      TrueValue = FalseValue;
+    } else {
+      TrueValue = Builder.CreateSelect(BrCond, TrueValue,
+                                       FalseValue, "retval");
+    }
+  }
+
+  Value *RI = !TrueValue ? 
+    Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+  (void) RI;
+      
+  DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+      
+  EraseTerminatorInstAndDCECond(BI);
+
+  return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+    Cond->getParent() != BB || !Cond->hasOneUse())
+  return false;
+
+  // Only allow this if the condition is a simple instruction that can be
+  // executed unconditionally.  It must be in the same block as the branch, and
+  // must be at the front of the block.
+  BasicBlock::iterator FrontIt = BB->front();
+
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+    
+  // Allow a single instruction to be hoisted in addition to the compare
+  // that feeds the branch.  We later ensure that any values that _it_ uses
+  // were also live in the predecessor, so that we don't unnecessarily create
+  // register pressure or inhibit out-of-order execution.
+  Instruction *BonusInst = 0;
+  if (&*FrontIt != Cond &&
+      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+      FrontIt->isSafeToSpeculativelyExecute()) {
+    BonusInst = &*FrontIt;
+    ++FrontIt;
+    
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
+  }
+
+  // Only a single bonus inst is allowed.
+  if (&*FrontIt != Cond)
+    return false;
+  
+  // Make sure the instruction after the condition is the cond branch.
+  BasicBlock::iterator CondIt = Cond; ++CondIt;
+
+  // Ingore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+  
+  if (&*CondIt != BI)
+    return false;
+
+  // Cond is known to be a compare or binary operator.  Check to make sure that
+  // neither operand is a potentially-trapping constant expression.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+    if (CE->canTrap())
+      return false;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+    if (CE->canTrap())
+      return false;
+  
+  // Finally, don't infinitely unroll conditional loops.
+  BasicBlock *TrueDest  = BI->getSuccessor(0);
+  BasicBlock *FalseDest = BI->getSuccessor(1);
+  if (TrueDest == BB || FalseDest == BB)
+    return false;
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *PredBlock = *PI;
+    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+    
+    // Check that we have two conditional branches.  If there is a PHI node in
+    // the common successor, verify that the same value flows in from both
+    // blocks.
+    if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
+      continue;
+    
+    // Determine if the two branches share a common destination.
+    Instruction::BinaryOps Opc;
+    bool InvertPredCond = false;
+    
+    if (PBI->getSuccessor(0) == TrueDest)
+      Opc = Instruction::Or;
+    else if (PBI->getSuccessor(1) == FalseDest)
+      Opc = Instruction::And;
+    else if (PBI->getSuccessor(0) == FalseDest)
+      Opc = Instruction::And, InvertPredCond = true;
+    else if (PBI->getSuccessor(1) == TrueDest)
+      Opc = Instruction::Or, InvertPredCond = true;
+    else
+      continue;
+
+    // Ensure that any values used in the bonus instruction are also used
+    // by the terminator of the predecessor.  This means that those values
+    // must already have been resolved, so we won't be inhibiting the 
+    // out-of-order core by speculating them earlier.
+    if (BonusInst) {
+      // Collect the values used by the bonus inst
+      SmallPtrSet<Value*, 4> UsedValues;
+      for (Instruction::op_iterator OI = BonusInst->op_begin(),
+           OE = BonusInst->op_end(); OI != OE; ++OI) {
+        Value* V = *OI;
+        if (!isa<Constant>(V))
+          UsedValues.insert(V);
+      }
+
+      SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+      Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+      
+      // Walk up to four levels back up the use-def chain of the predecessor's
+      // terminator to see if all those values were used.  The choice of four
+      // levels is arbitrary, to provide a compile-time-cost bound.
+      while (!Worklist.empty()) {
+        std::pair<Value*, unsigned> Pair = Worklist.back();
+        Worklist.pop_back();
+        
+        if (Pair.second >= 4) continue;
+        UsedValues.erase(Pair.first);
+        if (UsedValues.empty()) break;
+        
+        if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+          for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+               OI != OE; ++OI)
+            Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+        }       
+      }
+      
+      if (!UsedValues.empty()) return false;
+    }
+
+    DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+    IRBuilder<> Builder(PBI);    
+
+    // If we need to invert the condition in the pred block to match, do so now.
+    if (InvertPredCond) {
+      Value *NewCond = PBI->getCondition();
+      
+      if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+        CmpInst *CI = cast<CmpInst>(NewCond);
+        CI->setPredicate(CI->getInversePredicate());
+      } else {
+        NewCond = Builder.CreateNot(NewCond, 
+                                    PBI->getCondition()->getName()+".not");
+      }
+      
+      PBI->setCondition(NewCond);
+      BasicBlock *OldTrue = PBI->getSuccessor(0);
+      BasicBlock *OldFalse = PBI->getSuccessor(1);
+      PBI->setSuccessor(0, OldFalse);
+      PBI->setSuccessor(1, OldTrue);
+    }
+    
+    // If we have a bonus inst, clone it into the predecessor block.
+    Instruction *NewBonus = 0;
+    if (BonusInst) {
+      NewBonus = BonusInst->clone();
+      PredBlock->getInstList().insert(PBI, NewBonus);
+      NewBonus->takeName(BonusInst);
+      BonusInst->setName(BonusInst->getName()+".old");
+    }
+    
+    // Clone Cond into the predecessor basic block, and or/and the
+    // two conditions together.
+    Instruction *New = Cond->clone();
+    if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+    PredBlock->getInstList().insert(PBI, New);
+    New->takeName(Cond);
+    Cond->setName(New->getName()+".old");
+    
+    Instruction *NewCond = 
+      cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
+                                            New, "or.cond"));
+    PBI->setCondition(NewCond);
+    if (PBI->getSuccessor(0) == BB) {
+      AddPredecessorToBlock(TrueDest, PredBlock, BB);
+      PBI->setSuccessor(0, TrueDest);
+    }
+    if (PBI->getSuccessor(1) == BB) {
+      AddPredecessorToBlock(FalseDest, PredBlock, BB);
+      PBI->setSuccessor(1, FalseDest);
+    }
+
+    // Copy any debug value intrinsics into the end of PredBlock.
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isa<DbgInfoIntrinsic>(*I))
+        I->clone()->insertBefore(PBI);
+      
+    return true;
+  }
+  return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it.  We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+  assert(PBI->isConditional() && BI->isConditional());
+  BasicBlock *BB = BI->getParent();
+
+  // If this block ends with a branch instruction, and if there is a
+  // predecessor that ends on a branch of the same condition, make 
+  // this conditional branch redundant.
+  if (PBI->getCondition() == BI->getCondition() &&
+      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+    // Okay, the outcome of this conditional branch is statically
+    // knowable.  If this block had a single pred, handle specially.
+    if (BB->getSinglePredecessor()) {
+      // Turn this into a branch on constant.
+      bool CondIsTrue = PBI->getSuccessor(0) == BB;
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
+      return true;  // Nuke the branch on constant.
+    }
+    
+    // Otherwise, if there are multiple predecessors, insert a PHI that merges
+    // in the constant and simplify the block result.  Subsequent passes of
+    // simplifycfg will thread the block.
+    if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+      pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+                                       std::distance(PB, PE),
+                                       BI->getCondition()->getName() + ".pr",
+                                       BB->begin());
+      // Okay, we're going to insert the PHI node.  Since PBI is not the only
+      // predecessor, compute the PHI'd conditional value for all of the preds.
+      // Any predecessor where the condition is not computable we keep symbolic.
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+            PBI != BI && PBI->isConditional() &&
+            PBI->getCondition() == BI->getCondition() &&
+            PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+          bool CondIsTrue = PBI->getSuccessor(0) == BB;
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                              CondIsTrue), P);
+        } else {
+          NewPN->addIncoming(BI->getCondition(), P);
+        }
+      }
+      
+      BI->setCondition(NewPN);
+      return true;
+    }
+  }
+  
+  // If this is a conditional branch in an empty block, and if any
+  // predecessors is a conditional branch to one of our destinations,
+  // fold the conditions into logical ops and one cond br.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (&*BBI != BI)
+    return false;
+
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+    if (CE->canTrap())
+      return false;
+  
+  int PBIOp, BIOp;
+  if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+    PBIOp = BIOp = 0;
+  else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+    PBIOp = 0, BIOp = 1;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+    PBIOp = 1, BIOp = 0;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+    PBIOp = BIOp = 1;
+  else
+    return false;
+    
+  // Check to make sure that the other destination of this branch
+  // isn't BB itself.  If so, this is an infinite loop that will
+  // keep getting unwound.
+  if (PBI->getSuccessor(PBIOp) == BB)
+    return false;
+    
+  // Do not perform this transformation if it would require 
+  // insertion of a large number of select instructions. For targets
+  // without predication/cmovs, this is a big pessimization.
+  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+      
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       isa<PHINode>(II); ++II, ++NumPhis)
+    if (NumPhis > 2) // Disable this xform.
+      return false;
+    
+  // Finally, if everything is ok, fold the branches to logical ops.
+  BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
+  
+  DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
+  
+  
+  // If OtherDest *is* BB, then BB is a basic block with a single conditional
+  // branch in it, where one edge (OtherDest) goes back to itself but the other
+  // exits.  We don't *know* that the program avoids the infinite loop
+  // (even though that seems likely).  If we do this xform naively, we'll end up
+  // recursively unpeeling the loop.  Since we know that (after the xform is
+  // done) that the block *is* infinite if reached, we just make it an obviously
+  // infinite loop with no cond branch.
+  if (OtherDest == BB) {
+    // Insert it at the end of the function, because it's either code,
+    // or it won't matter if it's hot. :)
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
+    BranchInst::Create(InfLoopBlock, InfLoopBlock);
+    OtherDest = InfLoopBlock;
+  }  
+  
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+  // BI may have other predecessors.  Because of this, we leave
+  // it alone, but modify PBI.
+  
+  // Make sure we get to CommonDest on True&True directions.
+  Value *PBICond = PBI->getCondition();
+  IRBuilder<true, NoFolder> Builder(PBI);
+  if (PBIOp)
+    PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
+
+  Value *BICond = BI->getCondition();
+  if (BIOp)
+    BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
+
+  // Merge the conditions.
+  Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+  
+  // Modify PBI to branch on the new condition to the new dests.
+  PBI->setCondition(Cond);
+  PBI->setSuccessor(0, CommonDest);
+  PBI->setSuccessor(1, OtherDest);
+  
+  // OtherDest may have phi nodes.  If so, add an entry from PBI's
+  // block that are identical to the entries for BI's block.
+  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+  
+  // We know that the CommonDest already had an edge from PBI to
+  // it.  If it has PHIs though, the PHIs may have different
+  // entries for BB and PBI's BB.  If so, insert a select to make
+  // them agree.
+  PHINode *PN;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       (PN = dyn_cast<PHINode>(II)); ++II) {
+    Value *BIV = PN->getIncomingValueForBlock(BB);
+    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+    Value *PBIV = PN->getIncomingValue(PBBIdx);
+    if (BIV != PBIV) {
+      // Insert a select in PBI to pick the right value.
+      Value *NV = cast<SelectInst>
+        (Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
+      PN->setIncomingValue(PBBIdx, NV);
+    }
+  }
+  
+  DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+  
+  // This basic block is probably dead.  We know it has at least
+  // one fewer predecessor.
+  return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+                                       BasicBlock *TrueBB, BasicBlock *FalseBB){
+  // Remove any superfluous successor edges from the CFG.
+  // First, figure out which successors to preserve.
+  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+  // successor.
+  BasicBlock *KeepEdge1 = TrueBB;
+  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+  // Then remove the rest.
+  for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+    BasicBlock *Succ = OldTerm->getSuccessor(I);
+    // Make sure only to keep exactly one copy of each edge.
+    if (Succ == KeepEdge1)
+      KeepEdge1 = 0;
+    else if (Succ == KeepEdge2)
+      KeepEdge2 = 0;
+    else
+      Succ->removePredecessor(OldTerm->getParent());
+  }
+
+  IRBuilder<> Builder(OldTerm);
+  Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+  // Insert an appropriate new terminator.
+  if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+    if (TrueBB == FalseBB)
+      // We were only looking for one successor, and it was present.
+      // Create an unconditional branch to it.
+      Builder.CreateBr(TrueBB);
+    else
+      // We found both of the successors we were looking for.
+      // Create a conditional branch sharing the condition of the select.
+      Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+    // Neither of the selected blocks were successors, so this
+    // terminator must be unreachable.
+    new UnreachableInst(OldTerm->getContext(), OldTerm);
+  } else {
+    // One of the selected values was a successor, but the other wasn't.
+    // Insert an unconditional branch to the one that was found;
+    // the edge to the one that wasn't must be unreachable.
+    if (KeepEdge1 == 0)
+      // Only TrueBB was found.
+      Builder.CreateBr(TrueBB);
+    else
+      // Only FalseBB was found.
+      Builder.CreateBr(FalseBB);
+  }
+
+  EraseTerminatorInstAndDCECond(OldTerm);
+  return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+//   (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+  // Check for constant integer values in the select.
+  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+  if (!TrueVal || !FalseVal)
+    return false;
+
+  // Find the relevant condition and destinations.
+  Value *Condition = Select->getCondition();
+  BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+  BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+//   (indirectbr (select cond, blockaddress(@fn, BlockA),
+//                             blockaddress(@fn, BlockB)))
+// with
+//   (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+  // Check that both operands of the select are block addresses.
+  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+  if (!TBA || !FBA)
+    return false;
+
+  // Extract the actual blocks.
+  BasicBlock *TrueBB = TBA->getBasicBlock();
+  BasicBlock *FalseBB = FBA->getBasicBlock();
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch.  We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
+/// DEFAULT:
+///   %tmp = icmp eq i8 %A, 92
+///   br label %end
+/// end:
+///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+/// 
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+                                                  const TargetData *TD,
+                                                  IRBuilder<> &Builder) {
+  BasicBlock *BB = ICI->getParent();
+
+  // If the block has any PHIs in it or the icmp has multiple uses, it is too
+  // complex.
+  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+  Value *V = ICI->getOperand(0);
+  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+  
+  // The pattern we're looking for is where our only predecessor is a switch on
+  // 'V' and this block is the default case for the switch.  In this case we can
+  // fold the compared value into the switch to simplify things.
+  BasicBlock *Pred = BB->getSinglePredecessor();
+  if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+  
+  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+  if (SI->getCondition() != V)
+    return false;
+  
+  // If BB is reachable on a non-default case, then we simply know the value of
+  // V in this block.  Substitute it and constant fold the icmp instruction
+  // away.
+  if (SI->getDefaultDest() != BB) {
+    ConstantInt *VVal = SI->findCaseDest(BB);
+    assert(VVal && "Should have a unique destination value");
+    ICI->setOperand(0, VVal);
+    
+    if (Value *V = SimplifyInstruction(ICI, TD)) {
+      ICI->replaceAllUsesWith(V);
+      ICI->eraseFromParent();
+    }
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // Ok, the block is reachable from the default dest.  If the constant we're
+  // comparing exists in one of the other edges, then we can constant fold ICI
+  // and zap it.
+  if (SI->findCaseValue(Cst) != 0) {
+    Value *V;
+    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+      V = ConstantInt::getFalse(BB->getContext());
+    else
+      V = ConstantInt::getTrue(BB->getContext());
+    
+    ICI->replaceAllUsesWith(V);
+    ICI->eraseFromParent();
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // The use of the icmp has to be in the 'end' block, by the only PHI node in
+  // the block.
+  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+  PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+  if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+    return false;
+
+  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+  // true in the PHI.
+  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+  Constant *NewCst     = ConstantInt::getFalse(BB->getContext());
+
+  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+    std::swap(DefaultCst, NewCst);
+
+  // Replace ICI (which is used by the PHI for the default value) with true or
+  // false depending on if it is EQ or NE.
+  ICI->replaceAllUsesWith(DefaultCst);
+  ICI->eraseFromParent();
+
+  // Okay, the switch goes to this block on a default value.  Add an edge from
+  // the switch to the merge point on the compared value.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+                                         BB->getParent(), BB);
+  SI->addCase(Cst, NewBB);
+  
+  // NewBB branches to the phi block, add the uncond branch and the phi entry.
+  Builder.SetInsertPoint(NewBB);
+  Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+  Builder.CreateBr(SuccBlock);
+  PHIUse->addIncoming(NewCst, NewBB);
+  return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+                                      IRBuilder<> &Builder) {
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0) return false;
+  
+  
+  // Change br (X == 0 | X == 1), T, F into a switch instruction.
+  // If this is a bunch of seteq's or'd together, or if it's a bunch of
+  // 'setne's and'ed together, collect them.
+  Value *CompVal = 0;
+  std::vector<ConstantInt*> Values;
+  bool TrueWhenEqual = true;
+  Value *ExtraCase = 0;
+  unsigned UsedICmps = 0;
+  
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+                                     UsedICmps);
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+                                     UsedICmps);
+    TrueWhenEqual = false;
+  }
+  
+  // If we didn't have a multiply compared value, fail.
+  if (CompVal == 0) return false;
+
+  // Avoid turning single icmps into a switch.
+  if (UsedICmps <= 1)
+    return false;
+
+  // There might be duplicate constants in the list, which the switch
+  // instruction can't handle, remove them now.
+  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+  
+  // If Extra was used, we require at least two switch values to do the
+  // transformation.  A switch with one value is just an cond branch.
+  if (ExtraCase && Values.size() < 2) return false;
+  
+  // Figure out which block is which destination.
+  BasicBlock *DefaultBB = BI->getSuccessor(1);
+  BasicBlock *EdgeBB    = BI->getSuccessor(0);
+  if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+  
+  BasicBlock *BB = BI->getParent();
+  
+  DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+               << " cases into SWITCH.  BB is:\n" << *BB);
+  
+  // If there are any extra values that couldn't be folded into the switch
+  // then we evaluate them with an explicit branch first.  Split the block
+  // right before the condbr to handle it.
+  if (ExtraCase) {
+    BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+    // Remove the uncond branch added to the old block.
+    TerminatorInst *OldTI = BB->getTerminator();
+    Builder.SetInsertPoint(OldTI);
+
+    if (TrueWhenEqual)
+      Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+    else
+      Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+      
+    OldTI->eraseFromParent();
+    
+    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+    // for the edge we just added.
+    AddPredecessorToBlock(EdgeBB, BB, NewBB);
+    
+    DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
+          << "\nEXTRABB = " << *BB);
+    BB = NewBB;
+  }
+
+  Builder.SetInsertPoint(BI);
+  // Convert pointer to int before we switch.
+  if (CompVal->getType()->isPointerTy()) {
+    assert(TD && "Cannot switch on pointer without TargetData");
+    CompVal = Builder.CreatePtrToInt(CompVal,
+                                     TD->getIntPtrType(CompVal->getContext()),
+                                     "magicptr");
+  }
+  
+  // Create the new switch instruction now.
+  SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+  // Add all of the 'cases' to the switch instruction.
+  for (unsigned i = 0, e = Values.size(); i != e; ++i)
+    New->addCase(Values[i], EdgeBB);
+  
+  // We added edges from PI to the EdgeBB.  As such, if there were any
+  // PHI nodes in EdgeBB, they need entries to be added corresponding to
+  // the number of edges added.
+  for (BasicBlock::iterator BBI = EdgeBB->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    PHINode *PN = cast<PHINode>(BBI);
+    Value *InVal = PN->getIncomingValueForBlock(BB);
+    for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+      PN->addIncoming(InVal, BB);
+  }
+  
+  // Erase the old branch instruction.
+  EraseTerminatorInstAndDCECond(BI);
+  
+  DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+  // If this is a trivial landing pad that just continues unwinding the caught
+  // exception then zap the landing pad, turning its invokes into calls.
+  BasicBlock *BB = RI->getParent();
+  LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+  if (RI->getValue() != LPInst)
+    // Not a landing pad, or the resume is not unwinding the exception that
+    // caused control to branch here.
+    return false;
+
+  // Check that there are no other instructions except for debug intrinsics.
+  BasicBlock::iterator I = LPInst, E = RI;
+  while (++I != E)
+    if (!isa<DbgInfoIntrinsic>(I))
+      return false;
+
+  // Turn all invokes that unwind here into calls and delete the basic block.
+  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+    InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+    SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+    // Insert a call instruction before the invoke.
+    CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+    Call->takeName(II);
+    Call->setCallingConv(II->getCallingConv());
+    Call->setAttributes(II->getAttributes());
+    Call->setDebugLoc(II->getDebugLoc());
+
+    // Anything that used the value produced by the invoke instruction now uses
+    // the value produced by the call instruction.  Note that we do this even
+    // for void functions and calls with no uses so that the callgraph edge is
+    // updated.
+    II->replaceAllUsesWith(Call);
+    BB->removePredecessor(II->getParent());
+
+    // Insert a branch to the normal destination right before the invoke.
+    BranchInst::Create(II->getNormalDest(), II);
+
+    // Finally, delete the invoke instruction!
+    II->eraseFromParent();
+  }
+
+  // The landingpad is now unreachable.  Zap it.
+  BB->eraseFromParent();
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+  BasicBlock *BB = RI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+  
+  // Find predecessors that end with branches.
+  SmallVector<BasicBlock*, 8> UncondBranchPreds;
+  SmallVector<BranchInst*, 8> CondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *P = *PI;
+    TerminatorInst *PTI = P->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(P);
+      else
+        CondBranchPreds.push_back(BI);
+    }
+  }
+  
+  // If we found some, do the transformation!
+  if (!UncondBranchPreds.empty() && DupRet) {
+    while (!UncondBranchPreds.empty()) {
+      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+    }
+    
+    // If we eliminated all predecessors of the block, delete the block now.
+    if (pred_begin(BB) == pred_end(BB))
+      // We know there are no successors, so just nuke the block.
+      BB->eraseFromParent();
+    
+    return true;
+  }
+  
+  // Check out all of the conditional branches going to this return
+  // instruction.  If any of them just select between returns, change the
+  // branch itself into a select/return pair.
+  while (!CondBranchPreds.empty()) {
+    BranchInst *BI = CondBranchPreds.pop_back_val();
+    
+    // Check to see if the non-BB successor is also a return block.
+    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+        isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+        SimplifyCondBranchToTwoReturns(BI, Builder))
+      return true;
+  }
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
+  // Check to see if the first instruction in this block is just an unwind.
+  // If so, replace any invoke instructions which use this as an exception
+  // destination with call instructions.
+  BasicBlock *BB = UI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+  bool Changed = false;
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+    if (II && II->getUnwindDest() == BB) {
+      // Insert a new branch instruction before the invoke, because this
+      // is now a fall through.
+      Builder.SetInsertPoint(II);
+      BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+      Pred->getInstList().remove(II);   // Take out of symbol table
+      
+      // Insert the call now.
+      SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+      Builder.SetInsertPoint(BI);
+      CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+                                        Args, II->getName());
+      CI->setCallingConv(II->getCallingConv());
+      CI->setAttributes(II->getAttributes());
+      // If the invoke produced a value, the Call now does instead.
+      II->replaceAllUsesWith(CI);
+      delete II;
+      Changed = true;
+    }
+    
+    Preds.pop_back();
+  }
+  
+  // If this block is now dead (and isn't the entry block), remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+  
+  return Changed;  
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+  BasicBlock *BB = UI->getParent();
+  
+  bool Changed = false;
+  
+  // If there are any instructions immediately before the unreachable that can
+  // be removed, do so.
+  while (UI != BB->begin()) {
+    BasicBlock::iterator BBI = UI;
+    --BBI;
+    // Do not delete instructions that can have side effects which might cause
+    // the unreachable to not be reachable; specifically, calls and volatile
+    // operations may have this effect.
+    if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+    if (BBI->mayHaveSideEffects()) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        if (SI->isVolatile())
+          break;
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+        if (LI->isVolatile())
+          break;
+      } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+        if (RMWI->isVolatile())
+          break;
+      } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+        if (CXI->isVolatile())
+          break;
+      } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+                 !isa<LandingPadInst>(BBI)) {
+        break;
+      }
+      // Note that deleting LandingPad's here is in fact okay, although it
+      // involves a bit of subtle reasoning. If this inst is a LandingPad,
+      // all the predecessors of this block will be the unwind edges of Invokes,
+      // and we can therefore guarantee this block will be erased.
+    }
+
+    // Delete this instruction (any uses are guaranteed to be dead)
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BBI->eraseFromParent();
+    Changed = true;
+  }
+  
+  // If the unreachable instruction is the first in the block, take a gander
+  // at all of the predecessors of this instruction, and simplify them.
+  if (&BB->front() != UI) return Changed;
+  
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    TerminatorInst *TI = Preds[i]->getTerminator();
+    IRBuilder<> Builder(TI);
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isUnconditional()) {
+        if (BI->getSuccessor(0) == BB) {
+          new UnreachableInst(TI->getContext(), TI);
+          TI->eraseFromParent();
+          Changed = true;
+        }
+      } else {
+        if (BI->getSuccessor(0) == BB) {
+          Builder.CreateBr(BI->getSuccessor(1));
+          EraseTerminatorInstAndDCECond(BI);
+        } else if (BI->getSuccessor(1) == BB) {
+          Builder.CreateBr(BI->getSuccessor(0));
+          EraseTerminatorInstAndDCECond(BI);
+          Changed = true;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+        if (SI->getSuccessor(i) == BB) {
+          BB->removePredecessor(SI->getParent());
+          SI->removeCase(i);
+          --i; --e;
+          Changed = true;
+        }
+      // If the default value is unreachable, figure out the most popular
+      // destination and make it the default.
+      if (SI->getSuccessor(0) == BB) {
+        std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+          std::pair<unsigned, unsigned>& entry =
+              Popularity[SI->getSuccessor(i)];
+          if (entry.first == 0) {
+            entry.first = 1;
+            entry.second = i;
+          } else {
+            entry.first++;
+          }
+        }
+
+        // Find the most popular block.
+        unsigned MaxPop = 0;
+        unsigned MaxIndex = 0;
+        BasicBlock *MaxBlock = 0;
+        for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
+             I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+          if (I->second.first > MaxPop || 
+              (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+            MaxPop = I->second.first;
+            MaxIndex = I->second.second;
+            MaxBlock = I->first;
+          }
+        }
+        if (MaxBlock) {
+          // Make this the new default, allowing us to delete any explicit
+          // edges to it.
+          SI->setSuccessor(0, MaxBlock);
+          Changed = true;
+          
+          // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+          // it.
+          if (isa<PHINode>(MaxBlock->begin()))
+            for (unsigned i = 0; i != MaxPop-1; ++i)
+              MaxBlock->removePredecessor(SI->getParent());
+          
+          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+            if (SI->getSuccessor(i) == MaxBlock) {
+              SI->removeCase(i);
+              --i; --e;
+            }
+        }
+      }
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+      if (II->getUnwindDest() == BB) {
+        // Convert the invoke to a call instruction.  This would be a good
+        // place to note that the call does not throw though.
+        BranchInst *BI = Builder.CreateBr(II->getNormalDest());
+        II->removeFromParent();   // Take out of symbol table
+        
+        // Insert the call now...
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+        Builder.SetInsertPoint(BI);
+        CallInst *CI = Builder.CreateCall(II->getCalledValue(),
+                                          Args, II->getName());
+        CI->setCallingConv(II->getCallingConv());
+        CI->setAttributes(II->getAttributes());
+        // If the invoke produced a value, the call does now instead.
+        II->replaceAllUsesWith(CI);
+        delete II;
+        Changed = true;
+      }
+    }
+  }
+  
+  // If this block is now dead, remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+
+  return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+  assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+  // Make sure all cases point to the same destination and gather the values.
+  SmallVector<ConstantInt *, 16> Cases;
+  Cases.push_back(SI->getCaseValue(1));
+  for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+    if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+      return false;
+    Cases.push_back(SI->getCaseValue(I));
+  }
+  assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+  // Sort the case values, then check if they form a range we can transform.
+  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+  for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+    if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+      return false;
+  }
+
+  Constant *Offset = ConstantExpr::getNeg(Cases.back());
+  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+  Value *Sub = SI->getCondition();
+  if (!Offset->isNullValue())
+    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest());
+
+  // Prune obsolete incoming values off the successor's PHI nodes.
+  for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+  }
+  SI->eraseFromParent();
+
+  return true;
+}
+
+/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+  Value *Cond = SI->getCondition();
+  unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+  APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
+  ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne);
+
+  // Gather dead cases.
+  SmallVector<ConstantInt*, 8> DeadCases;
+  for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
+    if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
+        (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
+      DeadCases.push_back(SI->getCaseValue(I));
+      DEBUG(dbgs() << "SimplifyCFG: switch case '"
+                   << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+    }
+  }
+
+  // Remove dead cases from the switch.
+  for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
+    unsigned Case = SI->findCaseValue(DeadCases[I]);
+    // Prune unused values from PHI nodes.
+    SI->getSuccessor(Case)->removePredecessor(SI->getParent());
+    SI->removeCase(Case);
+  }
+
+  return !DeadCases.empty();
+}
+
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+                                              BasicBlock *BB,
+                                              int *PhiIndex) {
+  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+    return NULL; // BB must be empty to be a candidate for simplification.
+  if (!BB->getSinglePredecessor())
+    return NULL; // BB must be dominated by the switch.
+
+  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!Branch || !Branch->isUnconditional())
+    return NULL; // Terminator must be unconditional branch.
+
+  BasicBlock *Succ = Branch->getSuccessor(0);
+
+  BasicBlock::iterator I = Succ->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+    Value *InValue = PHI->getIncomingValue(Idx);
+    if (InValue != CaseValue) continue;
+
+    *PhiIndex = Idx;
+    return PHI;
+  }
+
+  return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+  typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+  ForwardingNodesMap ForwardingNodes;
+
+  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+    ConstantInt *CaseValue = SI->getCaseValue(I);
+    BasicBlock *CaseDest = SI->getSuccessor(I);
+
+    int PhiIndex;
+    PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+                                                 &PhiIndex);
+    if (!PHI) continue;
+
+    ForwardingNodes[PHI].push_back(PhiIndex);
+  }
+
+  bool Changed = false;
+
+  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+       E = ForwardingNodes.end(); I != E; ++I) {
+    PHINode *Phi = I->first;
+    SmallVector<int,4> &Indexes = I->second;
+
+    if (Indexes.size() < 2) continue;
+
+    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+  // If this switch is too complex to want to look at, ignore it.
+  if (!isValueEqualityComparison(SI))
+    return false;
+
+  BasicBlock *BB = SI->getParent();
+
+  // If we only have one predecessor, and if it is a branch on this value,
+  // see if that predecessor totally determines the outcome of this switch.
+  if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+    if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+      return SimplifyCFG(BB) | true;
+
+  Value *Cond = SI->getCondition();
+  if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+    if (SimplifySwitchOnSelect(SI, Select))
+      return SimplifyCFG(BB) | true;
+
+  // If the block only contains the switch, see if we can fold the block
+  // away into any preds.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (SI == &*BBI)
+    if (FoldValueComparisonIntoPredecessors(SI, Builder))
+      return SimplifyCFG(BB) | true;
+
+  // Try to transform the switch into an icmp and a branch.
+  if (TurnSwitchRangeIntoICmp(SI, Builder))
+    return SimplifyCFG(BB) | true;
+
+  // Remove unreachable cases.
+  if (EliminateDeadSwitchCases(SI))
+    return SimplifyCFG(BB) | true;
+
+  if (ForwardSwitchConditionToPHI(SI))
+    return SimplifyCFG(BB) | true;
+
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+  BasicBlock *BB = IBI->getParent();
+  bool Changed = false;
+  
+  // Eliminate redundant destinations.
+  SmallPtrSet<Value *, 8> Succs;
+  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+    BasicBlock *Dest = IBI->getDestination(i);
+    if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+      Dest->removePredecessor(BB);
+      IBI->removeDestination(i);
+      --i; --e;
+      Changed = true;
+    }
+  } 
+
+  if (IBI->getNumDestinations() == 0) {
+    // If the indirectbr has no successors, change it to unreachable.
+    new UnreachableInst(IBI->getContext(), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (IBI->getNumDestinations() == 1) {
+    // If the indirectbr has one successor, change it to a direct branch.
+    BranchInst::Create(IBI->getDestination(0), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+    if (SimplifyIndirectBrOnSelect(IBI, SI))
+      return SimplifyCFG(BB) | true;
+  }
+  return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
+  BasicBlock *BB = BI->getParent();
+  
+  // If the Terminator is the only non-phi instruction, simplify the block.
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+      TryToSimplifyUncondBranchFromEmptyBlock(BB))
+    return true;
+  
+  // If the only instruction in the block is a seteq/setne comparison
+  // against a constant, try to simplify the block.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+        ;
+      if (I->isTerminator() 
+          && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+        return true;
+    }
+  
+  return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+  BasicBlock *BB = BI->getParent();
+  
+  // Conditional branch
+  if (isValueEqualityComparison(BI)) {
+    // If we only have one predecessor, and if it is a branch on this value,
+    // see if that predecessor totally determines the outcome of this
+    // switch.
+    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+        return SimplifyCFG(BB) | true;
+    
+    // This block must be empty, except for the setcond inst, if it exists.
+    // Ignore dbg intrinsics.
+    BasicBlock::iterator I = BB->begin();
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(I))
+      ++I;
+    if (&*I == BI) {
+      if (FoldValueComparisonIntoPredecessors(BI, Builder))
+        return SimplifyCFG(BB) | true;
+    } else if (&*I == cast<Instruction>(BI->getCondition())){
+      ++I;
+      // Ignore dbg intrinsics.
+      while (isa<DbgInfoIntrinsic>(I))
+        ++I;
+      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+        return SimplifyCFG(BB) | true;
+    }
+  }
+  
+  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+  if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+    return true;
+  
+  // We have a conditional branch to two blocks that are only reachable
+  // from BI.  We know that the condbr dominates the two blocks, so see if
+  // there is any identical code in the "then" and "else" blocks.  If so, we
+  // can hoist it up to the branching block.
+  if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+    if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+      if (HoistThenElseCodeToIf(BI))
+        return SimplifyCFG(BB) | true;
+    } else {
+      // If Successor #1 has multiple preds, we may be able to conditionally
+      // execute Successor #0 if it branches to successor #1.
+      TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+      if (Succ0TI->getNumSuccessors() == 1 &&
+          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+          return SimplifyCFG(BB) | true;
+    }
+  } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+    // If Successor #0 has multiple preds, we may be able to conditionally
+    // execute Successor #1 if it branches to successor #0.
+    TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+    if (Succ1TI->getNumSuccessors() == 1 &&
+        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+        return SimplifyCFG(BB) | true;
+  }
+  
+  // If this is a branch on a phi node in the current block, thread control
+  // through this block if any PHI node entries are constants.
+  if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+    if (PN->getParent() == BI->getParent())
+      if (FoldCondBranchOnPHI(BI, TD))
+        return SimplifyCFG(BB) | true;
+  
+  // If this basic block is ONLY a setcc and a branch, and if a predecessor
+  // branches to us and one of our successors, fold the setcc into the
+  // predecessor and use logical operations to pick the right destination.
+  if (FoldBranchToCommonDest(BI))
+    return SimplifyCFG(BB) | true;
+  
+  // Scan predecessor blocks for conditional branches.
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      if (PBI != BI && PBI->isConditional())
+        if (SimplifyCondBranchToCondBranch(PBI, BI))
+          return SimplifyCFG(BB) | true;
+
+  return false;
+}
+
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C)
+    return false;
+
+  if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+    return false;
+
+  if (C->isNullValue()) {
+    Instruction *Use = I->use_back();
+
+    // Now make sure that there are no instructions in between that can alter
+    // control flow (eg. calls)
+    for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+      if (i == I->getParent()->end() || i->mayHaveSideEffects())
+        return false;
+
+    // Look through GEPs. A load from a GEP derived from NULL is still undefined
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+      if (GEP->getPointerOperand() == I)
+        return passingValueIsAlwaysUndefined(V, GEP);
+
+    // Look through bitcasts.
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+      return passingValueIsAlwaysUndefined(V, BC);
+
+    // Load from null is undefined.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+      return LI->getPointerAddressSpace() == 0;
+
+    // Store to null is undefined.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+      return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+  }
+  return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer derefence), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+  for (BasicBlock::iterator i = BB->begin();
+       PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+      if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+        TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+        IRBuilder<> Builder(T);
+        if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+          BB->removePredecessor(PHI->getIncomingBlock(i));
+          // Turn uncoditional branches into unreachables and remove the dead
+          // destination from conditional branches.
+          if (BI->isUnconditional())
+            Builder.CreateUnreachable();
+          else
+            Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
+                                                         BI->getSuccessor(0));
+          BI->eraseFromParent();
+          return true;
+        }
+        // TODO: SwitchInst.
+      }
+
+  return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
+
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+  // Remove basic blocks that have no predecessors (except the entry block)...
+  // or that just have themself as a predecessor.  These are unreachable.
+  if ((pred_begin(BB) == pred_end(BB) &&
+       BB != &BB->getParent()->getEntryBlock()) ||
+      BB->getSinglePredecessor() == BB) {
+    DEBUG(dbgs() << "Removing BB: \n" << *BB);
+    DeleteDeadBlock(BB);
+    return true;
+  }
+
+  // Check to see if we can constant propagate this terminator instruction
+  // away...
+  Changed |= ConstantFoldTerminator(BB, true);
+
+  // Check for and eliminate duplicate PHI nodes in this block.
+  Changed |= EliminateDuplicatePHINodes(BB);
+
+  // Check for and remove branches that will always cause undefined behavior.
+  Changed |= removeUndefIntroducingPredecessor(BB);
+
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  //
+  if (MergeBlockIntoPredecessor(BB))
+    return true;
+  
+  IRBuilder<> Builder(BB);
+
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2)
+      Changed |= FoldTwoEntryPHINode(PN, TD);
+
+  Builder.SetInsertPoint(BB->getTerminator());
+  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (BI->isUnconditional()) {
+      if (SimplifyUncondBranch(BI, Builder)) return true;
+    } else {
+      if (SimplifyCondBranch(BI, Builder)) return true;
+    }
+  } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+    if (SimplifyResume(RI, Builder)) return true;
+  } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    if (SimplifyReturn(RI, Builder)) return true;
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    if (SimplifySwitch(SI, Builder)) return true;
+  } else if (UnreachableInst *UI =
+               dyn_cast<UnreachableInst>(BB->getTerminator())) {
+    if (SimplifyUnreachable(UI)) return true;
+  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+    if (SimplifyUnwind(UI, Builder)) return true;
+  } else if (IndirectBrInst *IBI =
+               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+    if (SimplifyIndirectBr(IBI)) return true;
+  }
+
+  return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG.  For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG.  It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+  return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 0000000..76289c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,432 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand,  "Number of IV operands folded into a use");
+STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
+
+namespace {
+  /// SimplifyIndvar - This is a utility for simplifying induction variables
+  /// based on ScalarEvolution. It is the primary instrument of the
+  /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+  /// other loop passes that preserve SCEV.
+  class SimplifyIndvar {
+    Loop             *L;
+    LoopInfo         *LI;
+    DominatorTree    *DT;
+    ScalarEvolution  *SE;
+    IVUsers          *IU; // NULL for DisableIVRewrite
+    const TargetData *TD; // May be NULL
+
+    SmallVectorImpl<WeakVH> &DeadInsts;
+
+    bool Changed;
+
+  public:
+    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
+                   SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+      L(Loop),
+      LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
+      SE(SE),
+      IU(IVU),
+      TD(LPM->getAnalysisIfAvailable<TargetData>()),
+      DeadInsts(Dead),
+      Changed(false) {
+      assert(LI && "IV simplification requires LoopInfo");
+    }
+
+    bool hasChanged() const { return Changed; }
+
+    /// Iteratively perform simplification on a worklist of users of the
+    /// specified induction variable. This is the top-level driver that applies
+    /// all simplicitions to users of an IV.
+    void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+
+    Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+    bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+    void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+    void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+                              bool IsSigned);
+  };
+}
+
+/// foldIVUser - Fold an IV operand into its use.  This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+  Value *IVSrc = 0;
+  unsigned OperIdx = 0;
+  const SCEV *FoldedExpr = 0;
+  switch (UseInst->getOpcode()) {
+  default:
+    return 0;
+  case Instruction::UDiv:
+  case Instruction::LShr:
+    // We're only interested in the case where we know something about
+    // the numerator and have a constant denominator.
+    if (IVOperand != UseInst->getOperand(OperIdx) ||
+        !isa<ConstantInt>(UseInst->getOperand(1)))
+      return 0;
+
+    // Attempt to fold a binary operator with constant operand.
+    // e.g. ((I + 1) >> 2) => I >> 2
+    if (IVOperand->getNumOperands() != 2 ||
+        !isa<ConstantInt>(IVOperand->getOperand(1)))
+      return 0;
+
+    IVSrc = IVOperand->getOperand(0);
+    // IVSrc must be the (SCEVable) IV, since the other operand is const.
+    assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+    ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+    if (UseInst->getOpcode() == Instruction::LShr) {
+      // Get a constant for the divisor. See createSCEV.
+      uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+      if (D->getValue().uge(BitWidth))
+        return 0;
+
+      D = ConstantInt::get(UseInst->getContext(),
+                           APInt(BitWidth, 1).shl(D->getZExtValue()));
+    }
+    FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+  }
+  // We have something that might fold it's operand. Compare SCEVs.
+  if (!SE->isSCEVable(UseInst->getType()))
+    return 0;
+
+  // Bypass the operand if SCEV can prove it has no effect.
+  if (SE->getSCEV(UseInst) != FoldedExpr)
+    return 0;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+        << " -> " << *UseInst << '\n');
+
+  UseInst->setOperand(OperIdx, IVSrc);
+  assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+  ++NumElimOperand;
+  Changed = true;
+  if (IVOperand->use_empty())
+    DeadInsts.push_back(IVOperand);
+  return IVSrc;
+}
+
+/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+  unsigned IVOperIdx = 0;
+  ICmpInst::Predicate Pred = ICmp->getPredicate();
+  if (IVOperand != ICmp->getOperand(0)) {
+    // Swapped
+    assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+    IVOperIdx = 1;
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Get the SCEVs for the ICmp operands.
+  const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+  const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+  // Simplify unnecessary loops away.
+  const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+  S = SE->getSCEVAtScope(S, ICmpLoop);
+  X = SE->getSCEVAtScope(X, ICmpLoop);
+
+  // If the condition is always true or always false, replace it with
+  // a constant value.
+  if (SE->isKnownPredicate(Pred, S, X))
+    ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+  else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+    ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+  else
+    return;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+  ++NumElimCmp;
+  Changed = true;
+  DeadInsts.push_back(ICmp);
+}
+
+/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+                                      Value *IVOperand,
+                                      bool IsSigned) {
+  // We're only interested in the case where we know something about
+  // the numerator.
+  if (IVOperand != Rem->getOperand(0))
+    return;
+
+  // Get the SCEVs for the ICmp operands.
+  const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+  const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+  // Simplify unnecessary loops away.
+  const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+  S = SE->getSCEVAtScope(S, ICmpLoop);
+  X = SE->getSCEVAtScope(X, ICmpLoop);
+
+  // i % n  -->  i  if i is in [0,n).
+  if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+      SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                           S, X))
+    Rem->replaceAllUsesWith(Rem->getOperand(0));
+  else {
+    // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+    const SCEV *LessOne =
+      SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+    if (IsSigned && !SE->isKnownNonNegative(LessOne))
+      return;
+
+    if (!SE->isKnownPredicate(IsSigned ?
+                              ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                              LessOne, X))
+      return;
+
+    ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+                                  Rem->getOperand(0), Rem->getOperand(1));
+    SelectInst *Sel =
+      SelectInst::Create(ICmp,
+                         ConstantInt::get(Rem->getType(), 0),
+                         Rem->getOperand(0), "tmp", Rem);
+    Rem->replaceAllUsesWith(Sel);
+  }
+
+  // Inform IVUsers about the new users.
+  if (IU) {
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+  }
+  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+  ++NumElimRem;
+  Changed = true;
+  DeadInsts.push_back(Rem);
+}
+
+/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+                                     Instruction *IVOperand) {
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+    eliminateIVComparison(ICmp, IVOperand);
+    return true;
+  }
+  if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+    bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+    if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+      eliminateIVRemainder(Rem, IVOperand, IsSigned);
+      return true;
+    }
+  }
+
+  // Eliminate any operation that SCEV can prove is an identity function.
+  if (!SE->isSCEVable(UseInst->getType()) ||
+      (UseInst->getType() != IVOperand->getType()) ||
+      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+    return false;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+  UseInst->replaceAllUsesWith(IVOperand);
+  ++NumElimIdentity;
+  Changed = true;
+  DeadInsts.push_back(UseInst);
+  return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+  Instruction *Def,
+  SmallPtrSet<Instruction*,16> &Simplified,
+  SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Avoid infinite or exponential worklist processing.
+    // Also ensure unique worklist users.
+    // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+    // self edges first.
+    if (User != Def && Simplified.insert(User))
+      SimpleIVUsers.push_back(std::make_pair(User, Def));
+  }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  // Only consider affine recurrences.
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (AR && AR->getLoop() == L)
+    return true;
+
+  return false;
+}
+
+/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encouters a clear optimization
+/// opportunitiy.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+  if (!SE->isSCEVable(CurrIV->getType()))
+    return;
+
+  // Instructions processed by SimplifyIndvar for CurrIV.
+  SmallPtrSet<Instruction*,16> Simplified;
+
+  // Use-def pairs if IV users waiting to be processed for CurrIV.
+  SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+  // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+  // called multiple times for the same LoopPhi. This is the proper thing to
+  // do for loop header phis that use each other.
+  pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+  while (!SimpleIVUsers.empty()) {
+    std::pair<Instruction*, Instruction*> UseOper =
+      SimpleIVUsers.pop_back_val();
+    // Bypass back edges to avoid extra work.
+    if (UseOper.first == CurrIV) continue;
+
+    Instruction *IVOperand = UseOper.second;
+    for (unsigned N = 0; IVOperand; ++N) {
+      assert(N <= Simplified.size() && "runaway iteration");
+
+      Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+      if (!NewOper)
+        break; // done folding
+      IVOperand = dyn_cast<Instruction>(NewOper);
+    }
+    if (!IVOperand)
+      continue;
+
+    if (eliminateIVUser(UseOper.first, IVOperand)) {
+      pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+      continue;
+    }
+    CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+    if (V && Cast) {
+      V->visitCast(Cast);
+      continue;
+    }
+    if (isSimpleIVUser(UseOper.first, L, SE)) {
+      pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+    }
+  }
+}
+
+namespace llvm {
+
+/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
+                       SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
+{
+  LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
+  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+  SIV.simplifyUsers(CurrIV, V);
+  return SIV.hasChanged();
+}
+
+/// simplifyLoopIVs - Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
+                     SmallVectorImpl<WeakVH> &Dead) {
+  bool Changed = false;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+  }
+  return Changed;
+}
+
+/// simplifyIVUsers - Perform simplification on instructions recorded by the
+/// IVUsers pass.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyLoopIVs.
+bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
+                     SmallVectorImpl<WeakVH> &Dead) {
+  SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead);
+
+  // Each round of simplification involves a round of eliminating operations
+  // followed by a round of widening IVs. A single IVUsers worklist is used
+  // across all rounds. The inner loop advances the user. If widening exposes
+  // more uses, then another pass through the outer loop is triggered.
+  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+    Instruction *UseInst = I->getUser();
+    Value *IVOperand = I->getOperandValToReplace();
+
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+      SIV.eliminateIVComparison(ICmp, IVOperand);
+      continue;
+    }
+    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+        SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned);
+        continue;
+      }
+    }
+  }
+  return SIV.hasChanged();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 0000000..ac005f9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+  struct InstSimplifier : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstSimplifier() : FunctionPass(ID) {
+      initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+
+    /// runOnFunction - Remove instructions that simplify.
+    bool runOnFunction(Function &F) {
+      const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+      const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+      bool Changed = false;
+
+      do {
+        for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+             DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+          for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+            Instruction *I = BI++;
+            // The first time through the loop ToSimplify is empty and we try to
+            // simplify all instructions.  On later iterations ToSimplify is not
+            // empty and we only bother simplifying instructions that are in it.
+            if (!ToSimplify->empty() && !ToSimplify->count(I))
+              continue;
+            // Don't waste time simplifying unused instructions.
+            if (!I->use_empty())
+              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+                // Mark all uses for resimplification next time round the loop.
+                for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                     UI != UE; ++UI)
+                  Next->insert(cast<Instruction>(*UI));
+                I->replaceAllUsesWith(V);
+                ++NumSimplified;
+                Changed = true;
+              }
+            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+          }
+
+        // Place the list of instructions to simplify on the next loop iteration
+        // into ToSimplify.
+        std::swap(ToSimplify, Next);
+        Next->clear();
+      } while (!ToSimplify->empty());
+
+      return Changed;
+    }
+  };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+                false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+  return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 0000000..46d4ada
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,142 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them.  Additionally, it keeps track of which node is the new
+// exit node of the CFG.  If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+                "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+  return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+  // We preserve the non-critical-edgeness property
+  AU.addPreservedID(BreakCriticalEdgesID);
+  // This is a cluster of orthogonal Transforms
+  AU.addPreserved("mem2reg");
+  AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block.  The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+  // Loop over all of the blocks in a function, tracking all of the blocks that
+  // return.
+  //
+  std::vector<BasicBlock*> ReturningBlocks;
+  std::vector<BasicBlock*> UnwindingBlocks;
+  std::vector<BasicBlock*> UnreachableBlocks;
+  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (isa<ReturnInst>(I->getTerminator()))
+      ReturningBlocks.push_back(I);
+    else if (isa<UnwindInst>(I->getTerminator()))
+      UnwindingBlocks.push_back(I);
+    else if (isa<UnreachableInst>(I->getTerminator()))
+      UnreachableBlocks.push_back(I);
+
+  // Handle unwinding blocks first.
+  if (UnwindingBlocks.empty()) {
+    UnwindBlock = 0;
+  } else if (UnwindingBlocks.size() == 1) {
+    UnwindBlock = UnwindingBlocks.front();
+  } else {
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+           E = UnwindingBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unwind insn
+      BranchInst::Create(UnwindBlock, BB);
+    }
+  }
+
+  // Then unreachable blocks.
+  if (UnreachableBlocks.empty()) {
+    UnreachableBlock = 0;
+  } else if (UnreachableBlocks.size() == 1) {
+    UnreachableBlock = UnreachableBlocks.front();
+  } else {
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+           E = UnreachableBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unreachable inst.
+      BranchInst::Create(UnreachableBlock, BB);
+    }
+  }
+
+  // Now handle return blocks.
+  if (ReturningBlocks.empty()) {
+    ReturnBlock = 0;
+    return false;                          // No blocks return
+  } else if (ReturningBlocks.size() == 1) {
+    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+    return false;
+  }
+
+  // Otherwise, we need to insert a new basic block into the function, add a PHI
+  // nodes (if the function returns values), and convert all of the return
+  // instructions into unconditional branches.
+  //
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
+
+  PHINode *PN = 0;
+  if (F.getReturnType()->isVoidTy()) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+  } else {
+    // If the function doesn't return void... add a PHI node to the block...
+    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+                         "UnifiedRetVal");
+    NewRetBlock->getInstList().push_back(PN);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+  }
+
+  // Loop over all of the blocks, replacing the return instruction with an
+  // unconditional branch.
+  //
+  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+         E = ReturningBlocks.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    // Add an incoming element to the PHI node for every return instruction that
+    // is merging into this new block...
+    if (PN)
+      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+    BB->getInstList().pop_back();  // Remove the return insn
+    BranchInst::Create(NewRetBlock, BB);
+  }
+  ReturnBlock = NewRetBlock;
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 0000000..24e8c8f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+  initializeBreakCriticalEdgesPass(Registry);
+  initializeInstNamerPass(Registry);
+  initializeLCSSAPass(Registry);
+  initializeLoopSimplifyPass(Registry);
+  initializeLowerInvokePass(Registry);
+  initializeLowerSwitchPass(Registry);
+  initializePromotePassPass(Registry);
+  initializeUnifyFunctionExitNodesPass(Registry);
+  initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+  initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 0000000..fc2538d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,200 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+                      ValueMapTypeRemapper *TypeMapper) {
+  ValueToValueMapTy::iterator I = VM.find(V);
+  
+  // If the value already exists in the map, use it.
+  if (I != VM.end() && I->second) return I->second;
+  
+  // Global values do not need to be seeded into the VM if they
+  // are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<MDString>(V))
+    return VM[V] = const_cast<Value*>(V);
+  
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    // Inline asm may need *type* remapping.
+    FunctionType *NewTy = IA->getFunctionType();
+    if (TypeMapper) {
+      NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+      if (NewTy != IA->getFunctionType())
+        V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+                           IA->hasSideEffects(), IA->isAlignStack());
+    }
+    
+    return VM[V] = const_cast<Value*>(V);
+  }
+  
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    // If this is a module-level metadata and we know that nothing at the module
+    // level is changing, then use an identity mapping.
+    if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+      return VM[V] = const_cast<Value*>(V);
+    
+    // Create a dummy node in case we have a metadata cycle.
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+    VM[V] = Dummy;
+    
+    // Check all operands to see if any need to be remapped.
+    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+      Value *OP = MD->getOperand(i);
+      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+
+      // Ok, at least one operand needs remapping.  
+      SmallVector<Value*, 4> Elts;
+      Elts.reserve(MD->getNumOperands());
+      for (i = 0; i != e; ++i) {
+        Value *Op = MD->getOperand(i);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+      }
+      MDNode *NewMD = MDNode::get(V->getContext(), Elts);
+      Dummy->replaceAllUsesWith(NewMD);
+      VM[V] = NewMD;
+      MDNode::deleteTemporary(Dummy);
+      return NewMD;
+    }
+
+    VM[V] = const_cast<Value*>(V);
+    MDNode::deleteTemporary(Dummy);
+
+    // No operands needed remapping.  Use an identity mapping.
+    return const_cast<Value*>(V);
+  }
+
+  // Okay, this either must be a constant (which may or may not be mappable) or
+  // is something that is not in the mapping table.
+  Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+  if (C == 0)
+    return 0;
+  
+  if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+    Function *F = 
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+    BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+                                                       Flags, TypeMapper));
+    return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+  }
+  
+  // Otherwise, we have some other constant to remap.  Start by checking to see
+  // if all operands have an identity remapping.
+  unsigned OpNo = 0, NumOperands = C->getNumOperands();
+  Value *Mapped = 0;
+  for (; OpNo != NumOperands; ++OpNo) {
+    Value *Op = C->getOperand(OpNo);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    if (Mapped != C) break;
+  }
+  
+  // See if the type mapper wants to remap the type as well.
+  Type *NewTy = C->getType();
+  if (TypeMapper)
+    NewTy = TypeMapper->remapType(NewTy);
+
+  // If the result type and all operands match up, then just insert an identity
+  // mapping.
+  if (OpNo == NumOperands && NewTy == C->getType())
+    return VM[V] = C;
+  
+  // Okay, we need to create a new constant.  We've already processed some or
+  // all of the operands, set them all up now.
+  SmallVector<Constant*, 8> Ops;
+  Ops.reserve(NumOperands);
+  for (unsigned j = 0; j != OpNo; ++j)
+    Ops.push_back(cast<Constant>(C->getOperand(j)));
+  
+  // If one of the operands mismatch, push it and the other mapped operands.
+  if (OpNo != NumOperands) {
+    Ops.push_back(cast<Constant>(Mapped));
+  
+    // Map the rest of the operands that aren't processed yet.
+    for (++OpNo; OpNo != NumOperands; ++OpNo)
+      Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+                             Flags, TypeMapper));
+  }
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return VM[V] = CE->getWithOperands(Ops, NewTy);
+  if (isa<ConstantArray>(C))
+    return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+  if (isa<ConstantStruct>(C))
+    return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+  if (isa<ConstantVector>(C))
+    return VM[V] = ConstantVector::get(Ops);
+  // If this is a no-operand constant, it must be because the type was remapped.
+  if (isa<UndefValue>(C))
+    return VM[V] = UndefValue::get(NewTy);
+  if (isa<ConstantAggregateZero>(C))
+    return VM[V] = ConstantAggregateZero::get(NewTy);
+  assert(isa<ConstantPointerNull>(C));
+  return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+  // Remap operands.
+  for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+    // If we aren't ignoring missing entries, assert that something happened.
+    if (V != 0)
+      *op = V;
+    else
+      assert((Flags & RF_IgnoreMissingEntries) &&
+             "Referenced value not in value map!");
+  }
+
+  // Remap phi nodes' incoming blocks.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+      // If we aren't ignoring missing entries, assert that something happened.
+      if (V != 0)
+        PN->setIncomingBlock(i, cast<BasicBlock>(V));
+      else
+        assert((Flags & RF_IgnoreMissingEntries) &&
+               "Referenced block not in value map!");
+    }
+  }
+
+  // Remap attached metadata.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  I->getAllMetadata(MDs);
+  for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+       MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+    MDNode *Old = MI->second;
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+    if (New != Old)
+      I->setMetadata(MI->first, New);
+  }
+  
+  // If the instruction's type is being remapped, do so now.
+  if (TypeMapper)
+    I->mutateType(TypeMapper->remapType(I->getType()));
+}