23 files changed, 1284 insertions, 918 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9536939..0184390 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -47,21 +47,21 @@ using namespace llvm;
 using namespace llvm::PatternMatch;
 
 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
                       "sunken Cmps");
 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
                        "of sunken Casts");
 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
                           "computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
 
-static cl::opt<bool>
-CriticalEdgeSplit("cgp-critical-edge-splitting",
-                  cl::desc("Split critical edges during codegen prepare"),
-                  cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableBranchOpts(
+  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+  cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
 namespace {
   class CodeGenPrepare : public FunctionPass {
@@ -76,15 +76,15 @@ namespace {
     /// update it.
     BasicBlock::iterator CurInstIterator;
 
-    /// BackEdges - Keep a set of all the loop back edges.
-    ///
-    SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
-
-    // Keeps track of non-local addresses that have been sunk into a block. This
-    // allows us to avoid inserting duplicate code for blocks with multiple
-    // load/stores of the same address.
+    /// Keeps track of non-local addresses that have been sunk into a block.
+    /// This allows us to avoid inserting duplicate code for blocks with
+    /// multiple load/stores of the same address.
     DenseMap<Value*, Value*> SunkAddrs;
 
+    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+    /// be updated.
+    bool ModifiedDT;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -98,10 +98,6 @@ namespace {
       AU.addPreserved<ProfileInfo>();
     }
 
-    virtual void releaseMemory() {
-      BackEdges.clear();
-    }
-
   private:
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -113,7 +109,7 @@ namespace {
     bool OptimizeCallInst(CallInst *CI);
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
-    void findLoopBackEdges(const Function &F);
+    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
   };
 }
 
@@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
   return new CodeGenPrepare(TLI);
 }
 
-/// findLoopBackEdges - Do a DFS walk to find loop back edges.
-///
-void CodeGenPrepare::findLoopBackEdges(const Function &F) {
-  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
-  FindFunctionBackedges(F, Edges);
-  
-  BackEdges.insert(Edges.begin(), Edges.end());
-}
-
-
 bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
+  ModifiedDT = false;
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
+
   // First pass, eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
 
-  // Now find loop back edges, but only if they are being used to decide which
-  // critical edges to split.
-  if (CriticalEdgeSplit)
-    findLoopBackEdges(F);
-
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+      BasicBlock *BB = I++;
       MadeChange |= OptimizeBlock(*BB);
+    }
     EverMadeChange |= MadeChange;
   }
 
   SunkAddrs.clear();
 
+  if (!DisableBranchOpts) {
+    MadeChange = false;
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      MadeChange |= ConstantFoldTerminator(BB);
+
+    if (MadeChange)
+      ModifiedDT = true;
+    EverMadeChange |= MadeChange;
+  }
+
+  if (ModifiedDT && DT)
+    DT->DT->recalculate(F);
+
   return EverMadeChange;
 }
 
@@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
-  if (DT) {
+  if (DT && !ModifiedDT) {
     BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
     BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
     BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
@@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
-/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
-/// lives in to see if there is a block that we can reuse as a critical edge
-/// from TIBB.
-static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
-  BasicBlock *Dest = DestPHI->getParent();
-  
-  /// TIPHIValues - This array is lazily computed to determine the values of
-  /// PHIs in Dest that TI would provide.
-  SmallVector<Value*, 32> TIPHIValues;
-  
-  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
-  unsigned TIBBEntryNo = 0;
-  
-  // Check to see if Dest has any blocks that can be used as a split edge for
-  // this terminator.
-  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
-    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
-    // To be usable, the pred has to end with an uncond branch to the dest.
-    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
-    if (!PredBr || !PredBr->isUnconditional())
-      continue;
-    // Must be empty other than the branch and debug info.
-    BasicBlock::iterator I = Pred->begin();
-    while (isa<DbgInfoIntrinsic>(I))
-      I++;
-    if (&*I != PredBr)
-      continue;
-    // Cannot be the entry block; its label does not get emitted.
-    if (Pred == &Dest->getParent()->getEntryBlock())
-      continue;
-    
-    // Finally, since we know that Dest has phi nodes in it, we have to make
-    // sure that jumping to Pred will have the same effect as going to Dest in
-    // terms of PHI values.
-    PHINode *PN;
-    unsigned PHINo = 0;
-    unsigned PredEntryNo = pi;
-    
-    bool FoundMatch = true;
-    for (BasicBlock::iterator I = Dest->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
-      if (PHINo == TIPHIValues.size()) {
-        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
-          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
-        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
-      }
-      
-      // If the PHI entry doesn't work, we can't use this pred.
-      if (PN->getIncomingBlock(PredEntryNo) != Pred)
-        PredEntryNo = PN->getBasicBlockIndex(Pred);
-      
-      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
-        FoundMatch = false;
-        break;
-      }
-    }
-    
-    // If we found a workable predecessor, change TI to branch to Succ.
-    if (FoundMatch)
-      return Pred;
-  }
-  return 0;  
-}
-
-
-/// SplitEdgeNicely - Split the critical edge from TI to its specified
-/// successor if it will improve codegen.  We only do this if the successor has
-/// phi nodes (otherwise critical edges are ok).  If there is already another
-/// predecessor of the succ that is empty (and thus has no phi nodes), use it
-/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
-                     SmallSet<std::pair<const BasicBlock*,
-                                        const BasicBlock*>, 8> &BackEdges,
-                             Pass *P) {
-  BasicBlock *TIBB = TI->getParent();
-  BasicBlock *Dest = TI->getSuccessor(SuccNum);
-  assert(isa<PHINode>(Dest->begin()) &&
-         "This should only be called if Dest has a PHI!");
-  PHINode *DestPHI = cast<PHINode>(Dest->begin());
-
-  // Do not split edges to EH landing pads.
-  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
-    if (Invoke->getSuccessor(1) == Dest)
-      return;
-
-  // As a hack, never split backedges of loops.  Even though the copy for any
-  // PHIs inserted on the backedge would be dead for exits from the loop, we
-  // assume that the cost of *splitting* the backedge would be too high.
-  if (BackEdges.count(std::make_pair(TIBB, Dest)))
-    return;
-
-  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
-    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
-    if (PFI)
-      PFI->splitEdge(TIBB, Dest, ReuseBB);
-    Dest->removePredecessor(TIBB);
-    TI->setSuccessor(SuccNum, ReuseBB);
-    return;
-  }
-
-  SplitCriticalEdge(TI, SuccNum, P, true);
-}
-
-
 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
 /// sink it into user blocks to reduce the number of virtual
@@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     // happens.
     WeakVH IterHandle(CurInstIterator);
     
-    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+                              ModifiedDT ? 0 : DT);
 
     // If the iterator instruction was recursively deleted, start over at the
     // start of the block.
@@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
   return Simplifier.fold(CI, TD);
 }
 
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   br label %return
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   br label %return
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   br label %return
+/// return:
+///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+///   ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   ret i32 %tmp0
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   ret i32 %tmp1
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+  if (!TLI)
+    return false;
+
+  Value *V = RI->getReturnValue();
+  PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+  if (V && !PN)
+    return false;
+
+  BasicBlock *BB = RI->getParent();
+  if (PN && PN->getParent() != BB)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  // See llvm::isInTailCallPosition().
+  const Function *F = BB->getParent();
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Make sure there are no instructions between the PHI and return, or that the
+  // return is the first instruction in the block.
+  if (PN) {
+    BasicBlock::iterator BI = BB->begin();
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI != RI)
+      return false;
+  } else {
+    BasicBlock::iterator BI = BB->begin();
+    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+    if (&*BI != RI)
+      return false;
+  }
+
+  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+  /// call.
+  SmallVector<CallInst*, 4> TailCalls;
+  if (PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Make sure the phi value is indeed produced by the tail call.
+      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+          TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  } else {
+    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (!VisitedBBs.insert(*PI))
+        continue;
+
+      BasicBlock::InstListType &InstList = (*PI)->getInstList();
+      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+      if (RI == RE)
+        continue;
+
+      CallInst *CI = dyn_cast<CallInst>(&*RI);
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+    CallInst *CI = TailCalls[i];
+    CallSite CS(CI);
+
+    // Conservatively require the attributes of the call to match those of the
+    // return. Ignore noalias because it doesn't affect the call sequence.
+    unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+    if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+      continue;
+
+    // Make sure the call instruction is followed by an unconditional branch to
+    // the return block.
+    BasicBlock *CallBB = CI->getParent();
+    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+      continue;
+
+    // Duplicate the return into CallBB.
+    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    ModifiedDT = Changed = true;
+    ++NumRetsDup;
+  }
+
+  // If we eliminated all predecessors of the block, delete the block now.
+  if (Changed && pred_begin(BB) == pred_end(BB))
+    BB->eraseFromParent();
+
+  return Changed;
+}
+
 //===----------------------------------------------------------------------===//
 // Memory Optimization
 //===----------------------------------------------------------------------===//
@@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // the addressing mode obtained from the non-PHI roots of the graph
   // are equivalent.
   Value *Consensus = 0;
-  unsigned NumUses = 0;
+  unsigned NumUsesConsensus = 0;
+  bool IsNumUsesConsensusValid = false;
   SmallVector<Instruction*, 16> AddrModeInsts;
   ExtAddrMode AddrMode;
   while (!worklist.empty()) {
@@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     ExtAddrMode NewAddrMode =
       AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
                                    NewAddrModeInsts, *TLI);
-    
-    // Ensure that the obtained addressing mode is equivalent to that obtained
-    // for all other roots of the PHI traversal.  Also, when choosing one
-    // such root as representative, select the one with the most uses in order
-    // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
-    if (!Consensus || NewAddrMode == AddrMode) {
-      if (V->getNumUses() > NumUses) {
+
+    // This check is broken into two cases with very similar code to avoid using
+    // getNumUses() as much as possible. Some values have a lot of uses, so
+    // calling getNumUses() unconditionally caused a significant compile-time
+    // regression.
+    if (!Consensus) {
+      Consensus = V;
+      AddrMode = NewAddrMode;
+      AddrModeInsts = NewAddrModeInsts;
+      continue;
+    } else if (NewAddrMode == AddrMode) {
+      if (!IsNumUsesConsensusValid) {
+        NumUsesConsensus = Consensus->getNumUses();
+        IsNumUsesConsensusValid = true;
+      }
+
+      // Ensure that the obtained addressing mode is equivalent to that obtained
+      // for all other roots of the PHI traversal.  Also, when choosing one
+      // such root as representative, select the one with the most uses in order
+      // to keep the cost modeling heuristics in AddressingModeMatcher
+      // applicable.
+      unsigned NumUses = V->getNumUses();
+      if (NumUses > NumUsesConsensus) {
         Consensus = V;
-        NumUses = V->getNumUses();
-        AddrMode = NewAddrMode;
+        NumUsesConsensus = NumUses;
         AddrModeInsts = NewAddrModeInsts;
       }
       continue;
@@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
 
+  // If we have no uses, recursively delete the value and all dead instructions
+  // using it.
   if (Repl->use_empty()) {
+    // This can cause recursive deletion, which can invalidate our iterator.
+    // Use a WeakVH to hold onto it in case this happens.
+    WeakVH IterHandle(CurInstIterator);
+    BasicBlock *BB = CurInstIterator->getParent();
+    
     RecursivelyDeleteTriviallyDeadInstructions(Repl);
-    // This address is now available for reassignment, so erase the table entry;
-    // we don't want to match some completely different instruction.
-    SunkAddrs[Addr] = 0;
+
+    if (IterHandle != CurInstIterator) {
+      // If the iterator instruction was recursively deleted, start over at the
+      // start of the block.
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    } else {
+      // This address is now available for reassignment, so erase the table
+      // entry; we don't want to match some completely different instruction.
+      SunkAddrs[Addr] = 0;
+    }    
   }
   ++NumMemoryInsts;
   return true;
@@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
   if (CallInst *CI = dyn_cast<CallInst>(I))
     return OptimizeCallInst(CI);
 
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+    return DupRetToEnableTailCallOpts(RI);
+
   return false;
 }
 
@@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
-  bool MadeChange = false;
-
-  // Split all critical edges where the dest block has a PHI.
-  if (CriticalEdgeSplit) {
-    TerminatorInst *BBTI = BB.getTerminator();
-    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
-      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
-        BasicBlock *SuccBB = BBTI->getSuccessor(i);
-        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
-          SplitEdgeNicely(BBTI, i, BackEdges, this);
-      }
-    }
-  }
-
   SunkAddrs.clear();
+  bool MadeChange = false;
 
   CurInstIterator = BB.begin();
   for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index be12973..e275268 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "correlated-value-propagation"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index dbb68f3..8dbcc23 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 867a06a..53e4640 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
   // Okay, we have stores to two completely different pointers.  Try to
   // decompose the pointer into a "base + constant_offset" form.  If the base
   // pointers are equal, then we can reason about the two stores.
-  int64_t Off1 = 0, Off2 = 0;
-  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
-  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+  int64_t EarlierOff = 0, LaterOff = 0;
+  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
   
   // If the base pointers still differ, we have two completely different stores.
   if (BP1 != BP2)
     return false;
-  
-  // Otherwise, we might have a situation like:
-  //  store i16 -> P + 1 Byte
-  //  store i32 -> P
-  // In this case, we see if the later store completely overlaps all bytes
-  // stored by the previous store.
-  if (Off1 < Off2 ||                       // Earlier starts before Later.
-      Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
-    return false;
-  // Otherwise, we have complete overlap.
-  return true;
+
+  // The later store completely overlaps the earlier store if:
+  // 
+  // 1. Both start at the same offset and the later one's size is greater than
+  //    or equal to the earlier one's, or
+  //
+  //      |--earlier--|
+  //      |--   later   --|
+  //      
+  // 2. The earlier store has an offset greater than the later offset, but which
+  //    still lies completely within the later store.
+  //
+  //        |--earlier--|
+  //    |-----  later  ------|
+  //
+  // We have to be careful here as *Off is signed while *.Size is unsigned.
+  if (EarlierOff >= LaterOff &&
+      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+    return true;
+
+  // Otherwise, they don't completely overlap.
+  return false;
 }
 
 /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       // away the store and we bail out.  However, if we depend on on something
       // that overwrites the memory location we *can* potentially optimize it.
       //
-      // Find out what memory location the dependant instruction stores.
+      // Find out what memory location the dependent instruction stores.
       Instruction *DepWrite = InstDep.getInst();
       AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
       // If we didn't get a useful location, or if it isn't a size, bail out.
@@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       if (AA->doesNotAccessMemory(CS))
         continue;
       
-      unsigned NumModRef = 0, NumOther = 0;
-      
       // If the call might load from any of our allocas, then any store above
       // the call is live.
       SmallVector<Value*, 8> LiveAllocas;
       for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
            E = DeadStackObjects.end(); I != E; ++I) {
-        // If we detect that our AA is imprecise, it's not worth it to scan the
-        // rest of the DeadPointers set.  Just assume that the AA will return
-        // ModRef for everything, and go ahead and bail out.
-        if (NumModRef >= 16 && NumOther == 0)
-          return MadeChange;
-
         // See if the call site touches it.
         AliasAnalysis::ModRefResult A = 
           AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
         
-        if (A == AliasAnalysis::ModRef)
-          ++NumModRef;
-        else
-          ++NumOther;
-        
         if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
           LiveAllocas.push_back(*I);
       }
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
deleted file mode 100644
index 4c3d188..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function breaks GEPs with more than 2 non-zero operands into smaller
-// GEPs each with no more than 2 non-zero operands. This exposes redundancy
-// between GEPs with common initial operand sequences.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "split-geps"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
-  class GEPSplitter : public FunctionPass {
-    virtual bool runOnFunction(Function &F);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    explicit GEPSplitter() : FunctionPass(ID) {
-      initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char GEPSplitter::ID = 0;
-INITIALIZE_PASS(GEPSplitter, "split-geps",
-                "split complex GEPs into simple GEPs", false, false)
-
-FunctionPass *llvm::createGEPSplitterPass() {
-  return new GEPSplitter();
-}
-
-bool GEPSplitter::runOnFunction(Function &F) {
-  bool Changed = false;
-
-  // Visit each GEP instruction.
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
-        unsigned NumOps = GEP->getNumOperands();
-        // Ignore GEPs which are already simple.
-        if (NumOps <= 2)
-          continue;
-        bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
-                                cast<ConstantInt>(GEP->getOperand(1))->isZero();
-        if (NumOps == 3 && FirstIndexIsZero)
-          continue;
-        // The first index is special and gets expanded with a 2-operand GEP
-        // (unless it's zero, in which case we can skip this).
-        Value *NewGEP = FirstIndexIsZero ?
-          GEP->getOperand(0) :
-          GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
-                                    "tmp", GEP);
-        // All remaining indices get expanded with a 3-operand GEP with zero
-        // as the second operand.
-        Value *Idxs[2];
-        Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
-        for (unsigned i = 2; i != NumOps; ++i) {
-          Idxs[1] = GEP->getOperand(i);
-          NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
-        }
-        GEP->replaceAllUsesWith(NewGEP);
-        GEP->eraseFromParent();
-        Changed = true;
-      }
-
-  return Changed;
-}
-
-void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index a0123f5..efecb97 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 namespace {
   struct Expression {
     uint32_t opcode;
-    const Type* type;
+    const Type *type;
     SmallVector<uint32_t, 4> varargs;
 
-    Expression() { }
-    Expression(uint32_t o) : opcode(o) { }
+    Expression(uint32_t o = ~2U) : opcode(o) { }
 
     bool operator==(const Expression &other) const {
       if (opcode != other.opcode)
         return false;
-      else if (opcode == ~0U || opcode == ~1U)
+      if (opcode == ~0U || opcode == ~1U)
         return true;
-      else if (type != other.type)
+      if (type != other.type)
         return false;
-      else if (varargs != other.varargs)
+      if (varargs != other.varargs)
         return false;
       return true;
     }
   };
 
   class ValueTable {
-    private:
-      DenseMap<Value*, uint32_t> valueNumbering;
-      DenseMap<Expression, uint32_t> expressionNumbering;
-      AliasAnalysis* AA;
-      MemoryDependenceAnalysis* MD;
-      DominatorTree* DT;
-
-      uint32_t nextValueNumber;
-
-      Expression create_expression(Instruction* I);
-      uint32_t lookup_or_add_call(CallInst* C);
-    public:
-      ValueTable() : nextValueNumber(1) { }
-      uint32_t lookup_or_add(Value *V);
-      uint32_t lookup(Value *V) const;
-      void add(Value *V, uint32_t num);
-      void clear();
-      void erase(Value *v);
-      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
-      AliasAnalysis *getAliasAnalysis() const { return AA; }
-      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
-      void setDomTree(DominatorTree* D) { DT = D; }
-      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
-      void verifyRemoved(const Value *) const;
+    DenseMap<Value*, uint32_t> valueNumbering;
+    DenseMap<Expression, uint32_t> expressionNumbering;
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
+
+    uint32_t nextValueNumber;
+
+    Expression create_expression(Instruction* I);
+    uint32_t lookup_or_add_call(CallInst* C);
+  public:
+    ValueTable() : nextValueNumber(1) { }
+    uint32_t lookup_or_add(Value *V);
+    uint32_t lookup(Value *V) const;
+    void add(Value *V, uint32_t num);
+    void clear();
+    void erase(Value *v);
+    void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+    AliasAnalysis *getAliasAnalysis() const { return AA; }
+    void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+    void setDomTree(DominatorTree* D) { DT = D; }
+    uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+    void verifyRemoved(const Value *) const;
   };
 }
 
@@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const {
   return VI->second;
 }
 
-/// clear - Remove all entries from the ValueTable
+/// clear - Remove all entries from the ValueTable.
 void ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
   nextValueNumber = 1;
 }
 
-/// erase - Remove a value from the value numbering
+/// erase - Remove a value from the value numbering.
 void ValueTable::erase(Value *V) {
   valueNumbering.erase(V);
 }
@@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const {
 namespace {
 
   class GVN : public FunctionPass {
-    bool runOnFunction(Function &F);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    explicit GVN(bool noloads = false)
-        : FunctionPass(ID), NoLoads(noloads), MD(0) {
-      initializeGVNPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
     bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
-    const TargetData* TD;
-
+    const TargetData *TD;
+    
     ValueTable VN;
     
     /// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -418,17 +407,39 @@ namespace {
     DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
     BumpPtrAllocator TableAllocator;
     
+    SmallVector<Instruction*, 8> InstrsToErase;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit GVN(bool noloads = false)
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+    
+    /// markInstructionForDeletion - This removes the specified instruction from
+    /// our various maps and marks it for deletion.
+    void markInstructionForDeletion(Instruction *I) {
+      VN.erase(I);
+      InstrsToErase.push_back(I);
+    }
+    
+    const TargetData *getTargetData() const { return TD; }
+    DominatorTree &getDominatorTree() const { return *DT; }
+    AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+    MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+  private:
     /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
     /// its value number.
     void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
-      LeaderTableEntry& Curr = LeaderTable[N];
+      LeaderTableEntry &Curr = LeaderTable[N];
       if (!Curr.Val) {
         Curr.Val = V;
         Curr.BB = BB;
         return;
       }
       
-      LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+      LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
       Node->Val = V;
       Node->BB = BB;
       Node->Next = Curr.Next;
@@ -474,19 +485,17 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<AliasAnalysis>();
     }
+    
 
     // Helper fuctions
     // FIXME: eliminate or document these better
-    bool processLoad(LoadInst* L,
-                     SmallVectorImpl<Instruction*> &toErase);
-    bool processInstruction(Instruction *I,
-                            SmallVectorImpl<Instruction*> &toErase);
-    bool processNonLocalLoad(LoadInst* L,
-                             SmallVectorImpl<Instruction*> &toErase);
+    bool processLoad(LoadInst *L);
+    bool processInstruction(Instruction *I);
+    bool processNonLocalLoad(LoadInst *L);
     bool processBlock(BasicBlock *BB);
-    void dump(DenseMap<uint32_t, Value*>& d);
+    void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
-    bool performPRE(Function& F);
+    bool performPRE(Function &F);
     Value *findLeader(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
@@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
     return 0;
   
+  // If this is already the right type, just return it.
   const Type *StoredValTy = StoredVal->getType();
   
   uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
   
   // If the store and reload are the same size, we can always reuse it.
   if (StoreSize == LoadSize) {
-    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
-      // Pointer to Pointer -> use bitcast.
+    // Pointer to Pointer -> use bitcast.
+    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
       return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
-    }
     
     // Convert source pointers to integers, which can be bitcast.
     if (StoredValTy->isPointerTy()) {
@@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
                                         StorePtr, StoreSize, TD);
 }
 
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load.  See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+                                         LoadInst *DepLI, const TargetData &TD){
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+    return -1;
+  
+  Value *DepPtr = DepLI->getPointerOperand();
+  uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+  int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+  if (R != -1) return R;
+  
+  // If we have a load/load clobber an DepLI can be widened to cover this load,
+  // then we should widen it!
+  int64_t LoadOffs = 0;
+  const Value *LoadBase =
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+  if (Size == 0) return -1;
+  
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
 static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
                                             MemIntrinsic *MI,
                                             const TargetData &TD) {
@@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
 
 /// GetStoreValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias.  Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias.  Check this case to see if there is anything more we can do
+/// before we give up.
 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                    const Type *LoadTy,
                                    Instruction *InsertPt, const TargetData &TD){
@@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load.  This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+                                  const Type *LoadTy, Instruction *InsertPt,
+                                  GVN &gvn) {
+  const TargetData &TD = *gvn.getTargetData();
+  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+  // widen SrcVal out to a larger load.
+  unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  if (Offset+LoadSize > SrcValSize) {
+    assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+    assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+    // If we have a load/load clobber an DepLI can be widened to cover this
+    // load, then we should widen it to the next power of 2 size big enough!
+    unsigned NewLoadSize = Offset+LoadSize;
+    if (!isPowerOf2_32(NewLoadSize))
+      NewLoadSize = NextPowerOf2(NewLoadSize);
+
+    Value *PtrVal = SrcVal->getPointerOperand();
+    
+    // Insert the new load after the old load.  This ensures that subsequent
+    // memdep queries will find the new load.  We can't easily remove the old
+    // load completely because it is already in the value numbering table.
+    IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+    const Type *DestPTy = 
+      IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+    DestPTy = PointerType::get(DestPTy, 
+                       cast<PointerType>(PtrVal->getType())->getAddressSpace());
+    
+    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+    NewLoad->takeName(SrcVal);
+    NewLoad->setAlignment(SrcVal->getAlignment());
+    
+    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+    
+    // Replace uses of the original load with the wider load.  On a big endian
+    // system, we need to shift down to get the relevant bits.
+    Value *RV = NewLoad;
+    if (TD.isBigEndian())
+      RV = Builder.CreateLShr(RV,
+                    NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+    RV = Builder.CreateTrunc(RV, SrcVal->getType());
+    SrcVal->replaceAllUsesWith(RV);
+    
+    // We would like to use gvn.markInstructionForDeletion here, but we can't
+    // because the load is already memoized into the leader map table that GVN
+    // tracks.  It is potentially possible to remove the load from the table,
+    // but then there all of the operations based on it would need to be
+    // rehashed.  Just leave the dead load around.
+    gvn.getMemDep().removeInstruction(SrcVal);
+    SrcVal = NewLoad;
+  }
+  
+  return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
 /// GetMemInstValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering mem intrinsic.
 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
@@ -943,11 +1045,12 @@ struct AvailableValueInBlock {
   BasicBlock *BB;
   enum ValType {
     SimpleVal,  // A simple offsetted value that is accessed.
+    LoadVal,    // A value produced by a load.
     MemIntrin   // A memory intrinsic which is loaded from.
   };
   
   /// V - The value that is live out of the block.
-  PointerIntPair<Value *, 1, ValType> Val;
+  PointerIntPair<Value *, 2, ValType> Val;
   
   /// Offset - The byte offset in Val that is interesting for the load query.
   unsigned Offset;
@@ -972,37 +1075,69 @@ struct AvailableValueInBlock {
     return Res;
   }
   
+  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                       unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(LI);
+    Res.Val.setInt(LoadVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
   bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
   Value *getSimpleValue() const {
     assert(isSimpleValue() && "Wrong accessor");
     return Val.getPointer();
   }
   
+  LoadInst *getCoercedLoadValue() const {
+    assert(isCoercedLoadValue() && "Wrong accessor");
+    return cast<LoadInst>(Val.getPointer());
+  }
+  
   MemIntrinsic *getMemIntrinValue() const {
-    assert(!isSimpleValue() && "Wrong accessor");
+    assert(isMemIntrinValue() && "Wrong accessor");
     return cast<MemIntrinsic>(Val.getPointer());
   }
   
   /// MaterializeAdjustedValue - Emit code into this block to adjust the value
   /// defined here to the specified type.  This handles various coercion cases.
-  Value *MaterializeAdjustedValue(const Type *LoadTy,
-                                  const TargetData *TD) const {
+  Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
     Value *Res;
     if (isSimpleValue()) {
       Res = getSimpleValue();
       if (Res->getType() != LoadTy) {
+        const TargetData *TD = gvn.getTargetData();
         assert(TD && "Need target data to handle type mismatch case");
         Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
                                    *TD);
         
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
                      << *getSimpleValue() << '\n'
                      << *Res << '\n' << "\n\n\n");
       }
+    } else if (isCoercedLoadValue()) {
+      LoadInst *Load = getCoercedLoadValue();
+      if (Load->getType() == LoadTy && Offset == 0) {
+        Res = Load;
+      } else {
+        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                  gvn);
+        
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                     << *getCoercedLoadValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
     } else {
+      const TargetData *TD = gvn.getTargetData();
+      assert(TD && "Need target data to handle type mismatch case");
       Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
                                    LoadTy, BB->getTerminator(), *TD);
-      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                    << "  " << *getMemIntrinValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
@@ -1010,21 +1145,20 @@ struct AvailableValueInBlock {
   }
 };
 
-}
+} // end anonymous namespace
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
 /// that should be used at LI's definition site.
 static Value *ConstructSSAForLoadSet(LoadInst *LI, 
                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
-                                     const TargetData *TD,
-                                     const DominatorTree &DT,
-                                     AliasAnalysis *AA) {
+                                     GVN &gvn) {
   // Check for the fully redundant, dominating load case.  In this case, we can
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 && 
-      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
-    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+      gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+                                               LI->getParent()))
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
 
-    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
   }
   
   // Perform PHI construction.
   Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
   
   // If new PHI nodes were created, notify alias analysis.
-  if (V->getType()->isPointerTy())
+  if (V->getType()->isPointerTy()) {
+    AliasAnalysis *AA = gvn.getAliasAnalysis();
+    
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       AA->copyValue(LI, NewPHIs[i]);
     
@@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
       for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
         AA->addEscapingUse(P->getOperandUse(2*ii));
     }
+  }
 
   return V;
 }
@@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) {
 
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI,
-                              SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processNonLocalLoad(LoadInst *LI) {
   // Find the non-local dependencies of the load.
   SmallVector<NonLocalDepResult, 64> Deps;
   AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
@@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
+      Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
     DEBUG(
       dbgs() << "GVN: non-local load ";
       WriteAsOperand(dbgs(), LI);
@@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           }
         }
       }
+      
+      // Check to see if we have something like this:
+      //    load i32* P
+      //    load i8* (P+1)
+      // if we have this, replace the later with an extraction from the former.
+      if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+        // If this is a clobber and L is the first instruction in its block, then
+        // we have the first instruction in the entry block.
+        if (DepLI != LI && Address && TD) {
+          int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+                                                     LI->getPointerOperand(),
+                                                     DepLI, *TD);
+          
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+                                                                    Offset));
+            continue;
+          }
+        }
+      }
 
       // If the clobbering value is a memset/memcpy/memmove, see if we can
       // forward a value on from it.
@@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           continue;
         }          
       }
-      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+      ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
       continue;
     }
     
@@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
     
     // Perform PHI construction.
-    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                      VN.getAliasAnalysis());
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
     LI->replaceAllUsesWith(V);
 
     if (isa<PHINode>(V))
       V->takeName(LI);
     if (V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
-    VN.erase(LI);
-    toErase.push_back(LI);
+    markInstructionForDeletion(LI);
     ++NumGVNLoad;
     return true;
   }
@@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   }
 
   // Perform PHI construction.
-  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                    VN.getAliasAnalysis());
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
   if (V->getType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
-  VN.erase(LI);
-  toErase.push_back(LI);
+  markInstructionForDeletion(LI);
   ++NumPRELoad;
   return true;
 }
 
 /// processLoad - Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processLoad(LoadInst *L) {
   if (!MD)
     return false;
 
@@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // ... to a pointer that has been loaded from before...
   MemDepResult Dep = MD->getDependency(L);
 
-  // If the value isn't available, don't do anything!
-  if (Dep.isClobber()) {
+  // If we have a clobber and target data is around, see if this is a clobber
+  // that we can fix up through code synthesis.
+  if (Dep.isClobber() && TD) {
     // Check to see if we have something like this:
     //   store i32 123, i32* %P
     //   %A = bitcast i32* %P to i8*
@@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // completely covers this load.  This sort of thing can happen in bitfield
     // access code.
     Value *AvailVal = 0;
-    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
-      if (TD) {
-        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
-                                                    L->getPointerOperand(),
-                                                    DepSI, *TD);
-        if (Offset != -1)
-          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
-                                          L->getType(), L, *TD);
-      }
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+      int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                  L->getPointerOperand(),
+                                                  DepSI, *TD);
+      if (Offset != -1)
+        AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+                                        L->getType(), L, *TD);
+    }
+    
+    // Check to see if we have something like this:
+    //    load i32* P
+    //    load i8* (P+1)
+    // if we have this, replace the later with an extraction from the former.
+    if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+      // If this is a clobber and L is the first instruction in its block, then
+      // we have the first instruction in the entry block.
+      if (DepLI == L)
+        return false;
+      
+      int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+                                                 L->getPointerOperand(),
+                                                 DepLI, *TD);
+      if (Offset != -1)
+        AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+    }
     
     // If the clobbering value is a memset/memcpy/memmove, see if we can forward
     // a value on from it.
     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
-      if (TD) {
-        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
-                                                      L->getPointerOperand(),
-                                                      DepMI, *TD);
-        if (Offset != -1)
-          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
-      }
+      int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepMI, *TD);
+      if (Offset != -1)
+        AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
     }
         
     if (AvailVal) {
@@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
       L->replaceAllUsesWith(AvailVal);
       if (AvailVal->getType()->isPointerTy())
         MD->invalidateCachedPointerInfo(AvailVal);
-      VN.erase(L);
-      toErase.push_back(L);
+      markInstructionForDeletion(L);
       ++NumGVNLoad;
       return true;
     }
-        
+  }
+  
+  // If the value isn't available, don't do anything!
+  if (Dep.isClobber()) {
     DEBUG(
-      // fast print dep, using operator<< on instruction would be too slow
+      // fast print dep, using operator<< on instruction is too slow.
       dbgs() << "GVN: load ";
       WriteAsOperand(dbgs(), L);
       Instruction *I = Dep.getInst();
@@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
   // If it is defined in another block, try harder.
   if (Dep.isNonLocal())
-    return processNonLocalLoad(L, toErase);
+    return processNonLocalLoad(L);
 
   Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
@@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     L->replaceAllUsesWith(StoredVal);
     if (StoredVal->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(StoredVal);
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
@@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // (depending on its type).
     if (DepLI->getType() != L->getType()) {
       if (TD) {
-        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+                                                      L, *TD);
         if (AvailableVal == 0)
           return false;
       
@@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     L->replaceAllUsesWith(AvailableVal);
     if (DepLI->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(DepLI);
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
@@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // intervening stores, for example.
   if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
   
   // If this load occurs either right after a lifetime begin,
   // then the loaded value is undefined.
-  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
-      VN.erase(L);
-      toErase.push_back(L);
+      markInstructionForDeletion(L);
       ++NumGVNLoad;
       return true;
     }
@@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
 
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I,
-                             SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processInstruction(Instruction *I) {
   // Ignore dbg info intrinsics.
   if (isa<DbgInfoIntrinsic>(I))
     return false;
@@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I,
     I->replaceAllUsesWith(V);
     if (MD && V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
-    VN.erase(I);
-    toErase.push_back(I);
+    markInstructionForDeletion(I);
     return true;
   }
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    bool Changed = processLoad(LI, toErase);
-
-    if (!Changed) {
-      unsigned Num = VN.lookup_or_add(LI);
-      addToLeaderTable(Num, LI, LI->getParent());
-    }
+    if (processLoad(LI))
+      return true;
 
-    return Changed;
+    unsigned Num = VN.lookup_or_add(LI);
+    addToLeaderTable(Num, LI, LI->getParent());
+    return false;
   }
 
   // For conditions branches, we can perform simple conditional propagation on
@@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I,
   }
   
   // Remove it!
-  VN.erase(I);
   I->replaceAllUsesWith(repl);
   if (MD && repl->getType()->isPointerTy())
     MD->invalidateCachedPointerInfo(repl);
-  toErase.push_back(I);
+  markInstructionForDeletion(I);
   return true;
 }
 
@@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) {
 
 
 bool GVN::processBlock(BasicBlock *BB) {
-  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
-  // incrementing BI before processing an instruction).
-  SmallVector<Instruction*, 8> toErase;
+  // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+  // (and incrementing BI before processing an instruction).
+  assert(InstrsToErase.empty() &&
+         "We expect InstrsToErase to be empty across iterations");
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
        BI != BE;) {
-    ChangedFunction |= processInstruction(BI, toErase);
-    if (toErase.empty()) {
+    ChangedFunction |= processInstruction(BI);
+    if (InstrsToErase.empty()) {
       ++BI;
       continue;
     }
 
     // If we need some instructions deleted, do it now.
-    NumGVNInstr += toErase.size();
+    NumGVNInstr += InstrsToErase.size();
 
     // Avoid iterator invalidation.
     bool AtStart = BI == BB->begin();
     if (!AtStart)
       --BI;
 
-    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
-         E = toErase.end(); I != E; ++I) {
+    for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+         E = InstrsToErase.end(); I != E; ++I) {
       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
       if (MD) MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
     }
-    toErase.clear();
+    InstrsToErase.clear();
 
     if (AtStart)
       BI = BB->begin();
@@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) {
       addToLeaderTable(ValNo, PREInstr, PREPred);
 
       // Create a PHI to make the value available in this block.
-      PHINode* Phi = PHINode::Create(CurInst->getType(),
+      pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+      PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
                                      CurInst->getName() + ".pre-phi",
                                      CurrentBlock->begin());
-      for (pred_iterator PI = pred_begin(CurrentBlock),
-           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
         BasicBlock *P = *PI;
         Phi->addIncoming(predMap[P], P);
       }
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0fb6798..09d569a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -73,6 +73,7 @@ namespace {
     LoopInfo        *LI;
     ScalarEvolution *SE;
     DominatorTree   *DT;
+    SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
   public:
 
@@ -98,6 +99,7 @@ namespace {
     }
 
   private:
+    bool isValidRewrite(Value *FromVal, Value *ToVal);
 
     void EliminateIVComparisons();
     void EliminateIVRemainders();
@@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
 }
 
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+  // If an SCEV expression subsumed multiple pointers, its expansion could
+  // reassociate the GEP changing the base pointer. This is illegal because the
+  // final address produced by a GEP chain must be inbounds relative to its
+  // underlying object. Otherwise basic alias analysis, among other things,
+  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+  // producing an expression involving multiple pointers. Until then, we must
+  // bail out here.
+  //
+  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+  // because it understands lcssa phis while SCEV does not.
+  Value *FromPtr = FromVal;
+  Value *ToPtr = ToVal;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+    FromPtr = GEP->getPointerOperand();
+  }
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+    ToPtr = GEP->getPointerOperand();
+  }
+  if (FromPtr != FromVal || ToPtr != ToVal) {
+    // Quickly check the common case
+    if (FromPtr == ToPtr)
+      return true;
+
+    // SCEV may have rewritten an expression that produces the GEP's pointer
+    // operand. That's ok as long as the pointer operand has the same base
+    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+    // base of a recurrence. This handles the case in which SCEV expansion
+    // converts a pointer type recurrence into a nonrecurrent pointer base
+    // indexed by an integer recurrence.
+    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+    if (FromBase == ToBase)
+      return true;
+
+    DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+          << *FromBase << " != " << *ToBase << "\n");
+
+    return false;
+  }
+  return true;
+}
+
 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
 /// loop to be a canonical != comparison against the incremented loop induction
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
@@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   // update the branch to use the new comparison; in the common case this
   // will make old comparison dead.
   BI->setCondition(Cond);
-  RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+  DeadInsts.push_back(OrigCond);
 
   ++NumLFTR;
   Changed = true;
@@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         if (!SE->isLoopInvariant(ExitValue, L))
           continue;
 
-        Changed = true;
-        ++NumReplaced;
-
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
                      << "  LoopVal = " << *Inst << "\n");
 
+        if (!isValidRewrite(Inst, ExitVal)) {
+          DeadInsts.push_back(ExitVal);
+          continue;
+        }
+        Changed = true;
+        ++NumReplaced;
+
         PN->setIncomingValue(i, ExitVal);
 
         // If this instruction is dead now, delete it.
@@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
 }
 
 void IndVarSimplify::EliminateIVComparisons() {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Look for ICmp users.
   for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
     IVStrideUse &UI = *I;
@@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() {
     DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
     DeadInsts.push_back(ICmp);
   }
-
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-        dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 void IndVarSimplify::EliminateIVRemainders() {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Look for SRem and URem users.
   for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
     IVStrideUse &UI = *I;
@@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() {
     DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
     DeadInsts.push_back(Rem);
   }
-
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
+  DeadInsts.clear();
   Changed = false;
 
   // If there are any floating-point recurrences, attempt to
@@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                           ExitingBlock, BI, Rewriter);
   }
 
-  // Rewrite IV-derived expressions. Clears the rewriter cache.
+  // Rewrite IV-derived expressions.
   RewriteIVExpressions(L, Rewriter);
 
+  // Clear the rewriter cache, because values that are in the rewriter's cache
+  // can be deleted in the loop below, causing the AssertingVH in the cache to
+  // trigger.
+  Rewriter.clear();
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
   // The Rewriter may not be used from this point on.
 
   // Loop-invariant instructions in the preheader that aren't used in the
@@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
       if (!isSafe(*I, L, SE)) return false;
     return true;
   }
-  
+
   // A cast is safe if its operand is.
   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
     return isSafe(C->getOperand(), L, SE);
@@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
 }
 
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Rewrite all induction variable expressions in terms of the canonical
   // induction variable.
   //
@@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // Now expand it into actual Instructions and patch it into place.
     Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
     // Inform ScalarEvolution that this value is changing. The change doesn't
     // affect its value, but it does potentially affect which use lists the
     // value will be on after the replacement, which affects ScalarEvolution's
@@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
       NewVal->takeName(Op);
     User->replaceUsesOfWith(Op, NewVal);
     UI->setOperandValToReplace(NewVal);
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
+
     ++NumRemoved;
     Changed = true;
 
     // The old value may be dead now.
     DeadInsts.push_back(Op);
   }
-
-  // Clear the rewriter cache, because values that are in the rewriter's cache
-  // can be deleted in the loop below, causing the AssertingVH in the cache to
-  // trigger.
-  Rewriter.clear();
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 /// If there's a single exit block, sink any loop-invariant values that
@@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   BinaryOperator *Incr =
     dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
   if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-  
+
   // If this is not an add of the PHI with a constantfp, or if the constant fp
   // is not an integer, bail out.
   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
@@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   if (Compare == 0 || !Compare->hasOneUse() ||
       !isa<BranchInst>(Compare->use_back()))
     return;
-  
+
   BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
 
   // We need to verify that the branch actually controls the iteration count
@@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
       (L->contains(TheBr->getSuccessor(0)) &&
        L->contains(TheBr->getSuccessor(1))))
     return;
-  
-  
+
+
   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
   // transform it.
   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
@@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   if (ExitValueVal == 0 ||
       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
     return;
-  
+
   // Find new predicate for integer comparison.
   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
   switch (Compare->getPredicate()) {
@@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   case CmpInst::FCMP_OLE:
   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  
+
   // We convert the floating point induction variable to a signed i32 value if
   // we can.  This is only safe if the comparison will not overflow in a way
   // that won't be trapped by the integer equivalent operations.  Check for this
   // now.
   // TODO: We could use i64 if it is native and the range requires it.
-  
+
   // The start/stride/exit values must all fit in signed i32.
   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
     return;
@@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
     if (InitValue >= ExitValue ||
         NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
       return;
-    
+
     uint32_t Range = uint32_t(ExitValue-InitValue);
     if (NewPred == CmpInst::ICMP_SLE) {
       // Normalize SLE -> SLT, check for infinite loop.
       if (++Range == 0) return;  // Range overflows.
     }
-    
+
     unsigned Leftover = Range % uint32_t(IncValue);
-    
+
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
       return;
-    
+
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
       return;
-    
+
   } else {
     // If we have a negative stride, we require the init to be greater than the
     // exit value and an equality or greater than comparison.
     if (InitValue >= ExitValue ||
         NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
       return;
-    
+
     uint32_t Range = uint32_t(InitValue-ExitValue);
     if (NewPred == CmpInst::ICMP_SGE) {
       // Normalize SGE -> SGT, check for infinite loop.
       if (++Range == 0) return;  // Range overflows.
     }
-    
+
     unsigned Leftover = Range % uint32_t(-IncValue);
-    
+
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
       return;
-    
+
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
       return;
   }
-  
+
   const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
   // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
                       PN->getIncomingBlock(IncomingEdge));
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 90094a8..7168177 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
@@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) {
         Changed = true;
         continue;
       }
-      
+
       BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-      
+
       // Can't thread an unconditional jump, but if the block is "almost
       // empty", we can replace uses of it with uses of the successor and make
       // this dead.
@@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
 
 static bool hasAddressTakenAndUsed(BasicBlock *BB) {
   if (!BB->hasAddressTaken()) return false;
-  
+
   // If the block has its address taken, it may be a tree of dead constants
   // hanging off of it.  These shouldn't keep the block alive.
   BlockAddress *BA = BlockAddress::get(BB);
@@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     return false; // Must be an invoke.
   }
 
+  // Run constant folding to see if we can reduce the condition to a simple
+  // constant.
+  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+    Value *SimpleVal = ConstantFoldInstruction(I, TD);
+    if (SimpleVal) {
+      I->replaceAllUsesWith(SimpleVal);
+      I->eraseFromParent();
+      Condition = SimpleVal;
+    }
+  }
+
   // If the terminator is branching on an undef, we can pick any of the
   // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
   if (isa<UndefValue>(Condition)) {
@@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
 
   // Create a PHI node at the start of the block for the PRE'd load value.
-  PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+                                LoadBB->begin());
   PN->takeName(LI);
 
   // Insert new entries into the PHI for each predecessor.  A single block may
   // have multiple entries here.
-  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
-       ++PI) {
+  for (pred_iterator PI = PB; PI != PE; ++PI) {
     BasicBlock *P = *PI;
     AvailablePredsTy::iterator I =
       std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 0786793..93de9cf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) {
   // enough that we handle it as a special (more efficient) case.  It is more
   // efficient to handle because there are no PHI nodes that need to be placed.
   if (ExitBlocks.size() == 1) {
-    if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+    if (!isa<DbgInfoIntrinsic>(I) && 
+        !DT->dominates(I.getParent(), ExitBlocks[0])) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
       // If I has users in unreachable blocks, eliminate.
@@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
                  Preheader->getTerminator());
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
-  // Copy any value stored to or loaded from a must-alias of the pointer.
-  if (PreheaderLoad->getType()->isPointerTy()) {
-    Value *SomeValue;
-    if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
-      SomeValue = LI;
-    else
-      SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
-    
-    CurAST->copyValue(SomeValue, PreheaderLoad);
-  }
-
   // Rewrite all the loads in the loop and remember all the definitions from
   // stores in the loop.
   Promoter.run(LoopUses);
-  
-  // If the preheader load is itself a pointer, we need to tell alias analysis
-  // about the new pointer we created in the preheader block and about any PHI
-  // nodes that just got inserted.
-  if (PreheaderLoad->getType()->isPointerTy()) {
-    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
-      CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
-  }
-  
-  // fwew, we're done!
+
+  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+  if (PreheaderLoad->use_empty())
+    PreheaderLoad->eraseFromParent();
 }
 
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f8ce214..1366231 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -81,7 +81,7 @@ namespace {
 
     bool processLoopStore(StoreInst *SI, const SCEV *BECount);
     bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-    
+
     bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                  unsigned StoreAlignment,
                                  Value *SplatValue, Instruction *TheStore,
@@ -91,7 +91,7 @@ namespace {
                                     const SCEVAddRecExpr *StoreEv,
                                     const SCEVAddRecExpr *LoadEv,
                                     const SCEV *BECount);
-      
+
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.
     ///
@@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
 ///
 static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
   SmallVector<Instruction*, 32> NowDeadInsts;
-  
+
   NowDeadInsts.push_back(I);
-  
+
   // Before we touch this instruction, remove it from SE!
   do {
     Instruction *DeadInst = NowDeadInsts.pop_back_val();
-    
+
     // This instruction is dead, zap it, in stages.  Start by removing it from
     // SCEV.
     SE.forgetValue(DeadInst);
-    
+
     for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
       Value *Op = DeadInst->getOperand(op);
       DeadInst->setOperand(op, 0);
-      
+
       // If this operand just became dead, add it to the NowDeadInsts list.
       if (!Op->use_empty()) continue;
-      
+
       if (Instruction *OpI = dyn_cast<Instruction>(Op))
         if (isInstructionTriviallyDead(OpI))
           NowDeadInsts.push_back(OpI);
     }
-    
+
     DeadInst->eraseFromParent();
-    
+
   } while (!NowDeadInsts.empty());
 }
 
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   CurLoop = L;
-  
+
   // The trip count of the loop must be analyzable.
   SE = &getAnalysis<ScalarEvolution>();
   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
     return false;
   const SCEV *BECount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BECount)) return false;
-  
+
   // If this loop executes exactly one time, then it should be peeled, not
   // optimized by this pass.
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
     if (BECst->getValue()->getValue() == 0)
       return false;
-  
+
   // We require target data for now.
   TD = getAnalysisIfAvailable<TargetData>();
   if (TD == 0) return false;
@@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   DT = &getAnalysis<DominatorTree>();
   LoopInfo &LI = getAnalysis<LoopInfo>();
   TLI = &getAnalysis<TargetLibraryInfo>();
-  
+
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   DEBUG(dbgs() << "loop-idiom Scanning: F["
                << L->getHeader()->getParent()->getName()
                << "] Loop %" << L->getHeader()->getName() << "\n");
-  
+
   bool MadeChange = false;
   // Scan all the blocks in the loop that are not in subloops.
   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
@@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Ignore blocks in subloops.
     if (LI.getLoopFor(*BI) != CurLoop)
       continue;
-    
+
     MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
   }
   return MadeChange;
@@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
     if (!DT->dominates(BB, ExitBlocks[i]))
       return false;
-  
+
   bool MadeChange = false;
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
     Instruction *Inst = I++;
@@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
       WeakVH InstPtr(I);
       if (!processLoopStore(SI, BECount)) continue;
       MadeChange = true;
-      
+
       // If processing the store invalidated our iterator, start over from the
       // top of the block.
       if (InstPtr == 0)
         I = BB->begin();
       continue;
     }
-    
+
     // Look for memset instructions, which may be optimized to a larger memset.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
       WeakVH InstPtr(I);
       if (!processLoopMemSet(MSI, BECount)) continue;
       MadeChange = true;
-      
+
       // If processing the memset invalidated our iterator, start over from the
       // top of the block.
       if (InstPtr == 0)
@@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
       continue;
     }
   }
-  
+
   return MadeChange;
 }
 
@@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
 
   Value *StoredVal = SI->getValueOperand();
   Value *StorePtr = SI->getPointerOperand();
-  
+
   // Reject stores that are so large that they overflow an unsigned.
   uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
   if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
     return false;
-  
+
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
@@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
 
   // Check to see if the stride matches the size of the store.  If so, then we
   // know that every byte is touched in the loop.
-  unsigned StoreSize = (unsigned)SizeInBits >> 3; 
+  unsigned StoreSize = (unsigned)SizeInBits >> 3;
   const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-  
+
   if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
     // TODO: Could also handle negative stride here someday, that will require
     // the validity check in mayLoopAccessLocation to be updated though.
@@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
       dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
       dbgs() << "BB: " << *SI->getParent();
     }
-    
+
     return false;
   }
 
@@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
   // If we're not allowed to hack on memset, we fail.
   if (!TLI->has(LibFunc::memset))
     return false;
-  
+
   Value *Pointer = MSI->getDest();
-  
+
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
@@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
   uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
   if ((SizeInBytes >> 32) != 0)
     return false;
-  
+
   // Check to see if the stride matches the size of the memset.  If so, then we
   // know that every byte is touched in the loop.
   const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
-  
+
   // TODO: Could also handle negative stride here someday, that will require the
   // validity check in mayLoopAccessLocation to be updated though.
   if (Stride == 0 || MSI->getLength() != Stride->getValue())
     return false;
-  
+
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
                                  MSI->getAlignment(), MSI->getValue(),
                                  MSI, Ev, BECount);
@@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
     AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
-  
+
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
   // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
@@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
   // that doesn't seem worthwhile.
   Constant *C = dyn_cast<Constant>(V);
   if (C == 0) return 0;
-  
+
   // Only handle simple values that are a power of two bytes in size.
   uint64_t Size = TD.getTypeSizeInBits(V->getType());
   if (Size == 0 || (Size & 7) || (Size & (Size-1)))
     return 0;
-  
+
   // Don't care enough about darwin/ppc to implement this.
   if (TD.isBigEndian())
     return 0;
@@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
   // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
   // if the top and bottom are the same (e.g. for vectors and large integers).
   if (Size > 16) return 0;
-  
+
   // If the constant is exactly 16 bytes, just use it.
   if (Size == 16) return C;
 
@@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                         unsigned StoreAlignment, Value *StoredVal,
                         Instruction *TheStore, const SCEVAddRecExpr *Ev,
                         const SCEV *BECount) {
-  
+
   // If the stored value is a byte-wise value (like i32 -1), then it may be
   // turned into a memset of i8 -1, assuming that all the consecutive bytes
   // are stored.  A store of i32 0x01020304 can never be turned into a memset,
   // but it can be turned into memset_pattern if the target supports it.
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = 0;
-  
+
   // If we're allowed to form a memset, and the stored value would be acceptable
   // for memset, use it.
   if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     // do anything with a 3-byte store, for example.
     return false;
   }
-  
-  
+
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                             CurLoop, BECount,
                             StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
     return false;
-  
+
   // Okay, everything looks good, insert the memset.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
-  
+
   IRBuilder<> Builder(Preheader->getTerminator());
-  
+
   // The trip count of the loop and the base pointer of the addrec SCEV is
   // guaranteed to be loop invariant, which means that it should dominate the
   // header.  Just insert code for it in the preheader.
   SCEVExpander Expander(*SE);
-  
+
   unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
-  Value *BasePtr = 
+  Value *BasePtr =
     Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
                            Preheader->getTerminator());
-  
+
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
   const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-  
+
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
-                                         true /*no unsigned overflow*/);
+                                         SCEV::FlagNUW);
   if (StoreSize != 1)
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
-                               true /*no unsigned overflow*/);
-  
-  Value *NumBytes = 
+                               SCEV::FlagNUW);
+
+  Value *NumBytes =
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-  
-  Value *NewCall;
+
+  CallInst *NewCall;
   if (SplatValue)
     NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
   else {
     Module *M = TheStore->getParent()->getParent()->getParent();
     Value *MSP = M->getOrInsertFunction("memset_pattern16",
                                         Builder.getVoidTy(),
-                                        Builder.getInt8PtrTy(), 
+                                        Builder.getInt8PtrTy(),
                                         Builder.getInt8PtrTy(), IntPtr,
                                         (void*)0);
-    
+
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
     // an constant array of 16-bytes.  Plop the value into a mergable global.
     GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
@@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
     NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
   }
-  
+
   DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
                << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
-  (void)NewCall;
-  
+  NewCall->setDebugLoc(TheStore->getDebugLoc());
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   DeleteDeadInstruction(TheStore, *SE);
@@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // If we're not allowed to form memcpy, we fail.
   if (!TLI->has(LibFunc::memcpy))
     return false;
-  
+
   LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
-  
+
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             CurLoop, BECount, StoreSize,
                             getAnalysis<AliasAnalysis>(), SI))
     return false;
-  
+
   // Okay, everything looks good, insert the memcpy.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
-  
+
   IRBuilder<> Builder(Preheader->getTerminator());
-  
+
   // The trip count of the loop and the base pointer of the addrec SCEV is
   // guaranteed to be loop invariant, which means that it should dominate the
   // header.  Just insert code for it in the preheader.
   SCEVExpander Expander(*SE);
 
-  Value *LoadBasePtr = 
+  Value *LoadBasePtr =
     Expander.expandCodeFor(LoadEv->getStart(),
                            Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  Value *StoreBasePtr = 
+  Value *StoreBasePtr =
     Expander.expandCodeFor(StoreEv->getStart(),
                            Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  
+
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
   const Type *IntPtr = TD->getIntPtrType(SI->getContext());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-  
+
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
-                                         true /*no unsigned overflow*/);
+                                         SCEV::FlagNUW);
   if (StoreSize != 1)
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
-                               true /*no unsigned overflow*/);
-  
+                               SCEV::FlagNUW);
+
   Value *NumBytes =
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-  
+
   Value *NewCall =
     Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
                          std::min(SI->getAlignment(), LI->getAlignment()));
-  
+
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
                << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
                << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
   (void)NewCall;
-  
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   DeleteDeadInstruction(SI, *SE);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 95e1578..47dced3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // Now, this loop is suitable for rotation.
   BasicBlock *OrigPreheader = L->getLoopPreheader();
   BasicBlock *OrigLatch = L->getLoopLatch();
-  assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
+  
+  // If the loop could not be converted to canonical form, it must have an
+  // indirectbr in it, just give up.
+  if (OrigPreheader == 0 || OrigLatch == 0)
+    return false;
 
   // Anything ScalarEvolution may know about this loop or the PHI nodes
   // in its header will soon be invalidated.
@@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
     // We can fold the conditional branch in the preheader, this makes things
     // simpler. The first step is to remove the extra edge to the Exit block.
     Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
-    BranchInst::Create(NewHeader, PHBI);
+    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+    NewBI->setDebugLoc(PHBI->getDebugLoc());
     PHBI->eraseFromParent();
     
     // With our CFG finalized, update DomTree if it is available.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ac4aea2..5abc790 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
-                                      AR->getLoop()),
+                                      // FIXME: AR->getNoWrapFlags()
+                                      AR->getLoop(), SCEV::FlagAnyWrap),
                      L, Good, Bad, SE);
       return;
     }
@@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
                                        IgnoreSignificantBits);
       if (!Start) return 0;
-      return SE.getAddRecExpr(Start, Step, AR->getLoop());
+      // FlagNW is independent of the start value, step direction, and is
+      // preserved with smaller magnitude steps.
+      // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+      return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
     }
     return 0;
   }
@@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
     if (Result != 0)
-      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
     return Result;
   }
   return 0;
@@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
     if (Result)
-      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
     return Result;
   }
   return 0;
@@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
     switch (II->getIntrinsicID()) {
       default: break;
       case Intrinsic::prefetch:
-      case Intrinsic::x86_sse2_loadu_dq:
-      case Intrinsic::x86_sse2_loadu_pd:
-      case Intrinsic::x86_sse_loadu_ps:
       case Intrinsic::x86_sse_storeu_ps:
       case Intrinsic::x86_sse2_storeu_pd:
       case Intrinsic::x86_sse2_storeu_dq:
@@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F,
   }
 }
 
-/// Loose - Set this cost to a loosing value.
+/// Loose - Set this cost to a losing value.
 void Cost::Loose() {
   NumRegs = ~0u;
   AddRecCost = ~0u;
@@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() {
     if (!C->getValue().isStrictlyPositive()) continue;
 
     /* Add new PHINode. */
-    PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+    PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
 
     /* create new increment. '++d' in above example. */
     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
@@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() {
   }
 }
 
-/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
 /// at the given offset and other details. If so, update the use and
 /// return true.
 bool
@@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
     if (!AR->getStart()->isZero()) {
       CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
-                                       AR->getLoop()),
+                                       AR->getLoop(),
+                                       //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                                       SCEV::FlagAnyWrap),
                       C, Ops, L, SE);
       CollectSubexprs(AR->getStart(), C, Ops, L, SE);
       return;
@@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   }
 }
 
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
 /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
 /// we've done more filtering, as it may be able to find more formulae to
 /// eliminate.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80b263a..fef6bc3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -43,7 +43,13 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(ID) {
+    LoopUnroll(int T = -1, int C = -1,  int P = -1) : LoopPass(ID) {
+      CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+      CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+      CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+      UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+     
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     }
 
@@ -56,7 +62,10 @@ namespace {
     // explicit -unroll-threshold).
     static const unsigned OptSizeUnrollThreshold = 50;
     
+    unsigned CurrentCount;
     unsigned CurrentThreshold;
+    bool     CurrentAllowPartial;
+    bool     UserThreshold;        // CurrentThreshold is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+  return new LoopUnroll(Threshold, Count, AllowPartial);
+}
 
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
@@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // from UnrollThreshold, it is overridden to a smaller value if the current
   // function is marked as optimize-for-size, and the unroll threshold was
   // not user specified.
-  CurrentThreshold = UnrollThreshold;
-  if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
-      UnrollThreshold.getNumOccurrences() == 0)
-    CurrentThreshold = OptSizeUnrollThreshold;
+  unsigned Threshold = CurrentThreshold;
+  if (!UserThreshold && 
+      Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+    Threshold = OptSizeUnrollThreshold;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
-  unsigned Count = UnrollCount;
+  unsigned Count = CurrentCount;
 
   // Automatically select an unroll count.
   if (Count == 0) {
@@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Enforce the threshold.
-  if (CurrentThreshold != NoThreshold) {
+  if (Threshold != NoThreshold) {
     unsigned NumInlineCandidates;
     unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
@@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       return false;
     }
     uint64_t Size = (uint64_t)LoopSize*Count;
-    if (TripCount != 1 && Size > CurrentThreshold) {
+    if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
-            << " because size: " << Size << ">" << CurrentThreshold << "\n");
-      if (!UnrollAllowPartial) {
+            << " because size: " << Size << ">" << Threshold << "\n");
+      if (!CurrentAllowPartial) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
       // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = CurrentThreshold / LoopSize;
+      Count = Threshold / LoopSize;
       while (Count != 0 && TripCount%Count != 0) {
         Count--;
       }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bde0e53..a3035cb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <list>
 using namespace llvm;
 
@@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
 namespace {
   class MemCpyOpt : public FunctionPass {
     MemoryDependenceAnalysis *MD;
+    TargetLibraryInfo *TLI;
     const TargetData *TD;
   public:
     static char ID; // Pass identification, replacement for typeid
     MemCpyOpt() : FunctionPass(ID) {
       initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
       MD = 0;
+      TLI = 0;
+      TD = 0;
     }
 
     bool runOnFunction(Function &F);
@@ -316,6 +320,7 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
@@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                     false, false)
@@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
   if (M->getSource() == MDep->getSource())
     return false;
   
-  // Second, the length of the memcpy's must be the same, or the preceeding one
+  // Second, the length of the memcpy's must be the same, or the preceding one
   // must be larger than the following one.
   ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
   ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
@@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
+  if (!TLI->has(LibFunc::memmove))
+    return false;
+  
   // See if the pointers alias.
   if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
     return false;
@@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) {
   bool MadeChange = false;
   MD = &getAnalysis<MemoryDependenceAnalysis>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  
+  // If we don't have at least memset and memcpy, there is little point of doing
+  // anything here.  These are required by a freestanding implementation, so if
+  // even they are disabled, there is no point in trying hard.
+  if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+    return false;
+  
   while (1) {
     if (!iterateOnFunction(F))
       break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index e093b52..c1dfe15 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,6 +22,7 @@
 
 #define DEBUG_TYPE "reassociate"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -74,6 +75,8 @@ namespace {
   class Reassociate : public FunctionPass {
     DenseMap<BasicBlock*, unsigned> RankMap;
     DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+    SmallVector<WeakVH, 8> RedoInsts;
+    SmallVector<WeakVH, 8> DeadInsts;
     bool MadeChange;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -98,7 +101,7 @@ namespace {
     void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
     void LinearizeExpr(BinaryOperator *I);
     Value *RemoveFactorFromExpression(Value *V, Value *Factor);
-    void ReassociateBB(BasicBlock *BB);
+    void ReassociateInst(BasicBlock::iterator &BBI);
     
     void RemoveDeadBinaryOp(Value *V);
   };
@@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
 
 void Reassociate::RemoveDeadBinaryOp(Value *V) {
   Instruction *Op = dyn_cast<Instruction>(V);
-  if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+  if (!Op || !isa<BinaryOperator>(Op))
     return;
   
   Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
   
   ValueRankMap.erase(Op);
-  Op->eraseFromParent();
+  DeadInsts.push_back(Op);
   RemoveDeadBinaryOp(LHS);
   RemoveDeadBinaryOp(RHS);
 }
@@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg,
   ValueRankMap.erase(Neg);
   Res->takeName(Neg);
   Neg->replaceAllUsesWith(Res);
+  Res->setDebugLoc(Neg->getDebugLoc());
   Neg->eraseFromParent();
   return Res;
 }
@@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   // Everyone now refers to the add instruction.
   ValueRankMap.erase(Sub);
   Sub->replaceAllUsesWith(New);
+  New->setDebugLoc(Sub->getDebugLoc());
   Sub->eraseFromParent();
 
   DEBUG(dbgs() << "Negated: " << *New << '\n');
@@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
     ValueRankMap.erase(Shl);
     Mul->takeName(Shl);
     Shl->replaceAllUsesWith(Mul);
+    Mul->setDebugLoc(Shl->getDebugLoc());
     Shl->eraseFromParent();
     return Mul;
   }
@@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
   // remaining operand.
   if (Factors.size() == 1) {
     ValueRankMap.erase(BO);
-    BO->eraseFromParent();
+    DeadInsts.push_back(BO);
     V = Factors[0].Op;
   } else {
     RewriteExprTree(BO, Factors);
@@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       // Now that we have inserted a multiply, optimize it. This allows us to
       // handle cases that require multiple factoring steps, such as this:
       // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
-      Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+      RedoInsts.push_back(Mul);
       
       // If every add operand was a duplicate, return the multiply.
       if (Ops.empty())
@@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 }
 
 
-/// ReassociateBB - Inspect all of the instructions in this basic block,
-/// reassociating them as we go.
-void Reassociate::ReassociateBB(BasicBlock *BB) {
-  for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
-    Instruction *BI = BBI++;
-    if (BI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(BI->getOperand(1)))
-      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
-        MadeChange = true;
-        BI = NI;
-      }
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+  Instruction *BI = BBI++;
+  if (BI->getOpcode() == Instruction::Shl &&
+      isa<ConstantInt>(BI->getOperand(1)))
+    if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+      MadeChange = true;
+      BI = NI;
+    }
 
-    // Reject cases where it is pointless to do this.
-    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
-        BI->getType()->isVectorTy())
-      continue;  // Floating point ops are not associative.
-
-    // Do not reassociate boolean (i1) expressions.  We want to preserve the
-    // original order of evaluation for short-circuited comparisons that
-    // SimplifyCFG has folded to AND/OR expressions.  If the expression
-    // is not further optimized, it is likely to be transformed back to a
-    // short-circuited form for code gen, and the source order may have been
-    // optimized for the most likely conditions.
-    if (BI->getType()->isIntegerTy(1))
-      continue;
+  // Reject cases where it is pointless to do this.
+  if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
+      BI->getType()->isVectorTy())
+    return;  // Floating point ops are not associative.
+
+  // Do not reassociate boolean (i1) expressions.  We want to preserve the
+  // original order of evaluation for short-circuited comparisons that
+  // SimplifyCFG has folded to AND/OR expressions.  If the expression
+  // is not further optimized, it is likely to be transformed back to a
+  // short-circuited form for code gen, and the source order may have been
+  // optimized for the most likely conditions.
+  if (BI->getType()->isIntegerTy(1))
+    return;
 
-    // If this is a subtract instruction which is not already in negate form,
-    // see if we can convert it to X+-Y.
-    if (BI->getOpcode() == Instruction::Sub) {
-      if (ShouldBreakUpSubtract(BI)) {
-        BI = BreakUpSubtract(BI, ValueRankMap);
-        // Reset the BBI iterator in case BreakUpSubtract changed the
-        // instruction it points to.
-        BBI = BI;
-        ++BBI;
+  // If this is a subtract instruction which is not already in negate form,
+  // see if we can convert it to X+-Y.
+  if (BI->getOpcode() == Instruction::Sub) {
+    if (ShouldBreakUpSubtract(BI)) {
+      BI = BreakUpSubtract(BI, ValueRankMap);
+      // Reset the BBI iterator in case BreakUpSubtract changed the
+      // instruction it points to.
+      BBI = BI;
+      ++BBI;
+      MadeChange = true;
+    } else if (BinaryOperator::isNeg(BI)) {
+      // Otherwise, this is a negation.  See if the operand is a multiply tree
+      // and if this is not an inner node of a multiply tree.
+      if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+          (!BI->hasOneUse() ||
+           !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+        BI = LowerNegateToMultiply(BI, ValueRankMap);
         MadeChange = true;
-      } else if (BinaryOperator::isNeg(BI)) {
-        // Otherwise, this is a negation.  See if the operand is a multiply tree
-        // and if this is not an inner node of a multiply tree.
-        if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
-            (!BI->hasOneUse() ||
-             !isReassociableOp(BI->use_back(), Instruction::Mul))) {
-          BI = LowerNegateToMultiply(BI, ValueRankMap);
-          MadeChange = true;
-        }
       }
     }
+  }
 
-    // If this instruction is a commutative binary operator, process it.
-    if (!BI->isAssociative()) continue;
-    BinaryOperator *I = cast<BinaryOperator>(BI);
+  // If this instruction is a commutative binary operator, process it.
+  if (!BI->isAssociative()) return;
+  BinaryOperator *I = cast<BinaryOperator>(BI);
 
-    // If this is an interior node of a reassociable tree, ignore it until we
-    // get to the root of the tree, to avoid N^2 analysis.
-    if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
-      continue;
+  // If this is an interior node of a reassociable tree, ignore it until we
+  // get to the root of the tree, to avoid N^2 analysis.
+  if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+    return;
 
-    // If this is an add tree that is used by a sub instruction, ignore it 
-    // until we process the subtract.
-    if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
-        cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
-      continue;
+  // If this is an add tree that is used by a sub instruction, ignore it 
+  // until we process the subtract.
+  if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+    return;
 
-    ReassociateExpression(I);
-  }
+  ReassociateExpression(I);
 }
 
 Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
@@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
     // eliminate it.
     DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
     I->replaceAllUsesWith(V);
+    if (Instruction *VI = dyn_cast<Instruction>(V))
+      VI->setDebugLoc(I->getDebugLoc());
     RemoveDeadBinaryOp(I);
     ++NumAnnihil;
     return V;
@@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
     // This expression tree simplified to something that isn't a tree,
     // eliminate it.
     I->replaceAllUsesWith(Ops[0].Op);
+    if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+      OI->setDebugLoc(I->getDebugLoc());
     RemoveDeadBinaryOp(I);
     return Ops[0].Op;
   }
@@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) {
 
   MadeChange = false;
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    ReassociateBB(FI);
+    for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+      ReassociateInst(BBI);
+
+  // Now that we're done, revisit any instructions which are likely to
+  // have secondary reassociation opportunities.
+  while (!RedoInsts.empty())
+    if (Value *V = RedoInsts.pop_back_val()) {
+      BasicBlock::iterator BBI = cast<Instruction>(V);
+      ReassociateInst(BBI);
+    }
+
+  // Now that we're done, delete any instructions which are no longer used.
+  while (!DeadInsts.empty())
+    if (Value *V = DeadInsts.pop_back_val())
+      RecursivelyDeleteTriviallyDeadInstructions(V);
 
   // We are done with the rank map.
   RankMap.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 459bb06..47afc77 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -9,7 +9,7 @@
 //
 // This file demotes all registers to memory references.  It is intented to be
 // the inverse of PromoteMemoryToRegister.  By converting to loads, the only
-// values live accross basic blocks are allocas and loads before phi nodes.
+// values live across basic blocks are allocas and loads before phi nodes.
 // It is intended that this should make CFG hacking much easier.
 // To make later hacking easier, the entry block is split into two, such that
 // all introduced allocas and nothing else are in the entry block.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index c82e929..db8eb85 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) {
     ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
   }
     
-  // If we infered constant or undef values for globals variables, we can delete
+  // If we inferred constant or undef values for globals variables, we can delete
   // the global and any stores that remain to it.
   const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
   for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index bf9ca6d..32a0506 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -17,6 +17,7 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Scalar.h"
@@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeDCEPass(Registry);
   initializeDeadInstEliminationPass(Registry);
   initializeDSEPass(Registry);
-  initializeGEPSplitterPass(Registry);
   initializeGVNPass(Registry);
   initializeEarlyCSEPass(Registry);
   initializeIndVarSimplifyPass(Registry);
@@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeSROA_DTPass(Registry);
   initializeSROA_SSAUpPass(Registry);
   initializeCFGSimplifyPassPass(Registry);
-  initializeSimplifyHalfPowrLibCallsPass(Registry);
   initializeSimplifyLibCallsPass(Registry);
   initializeSinkingPass(Registry);
   initializeTailDupPass(Registry);
@@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopDeletionPass());
 }
 
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopIdiomPass());
+}
+
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopRotatePass());
 }
@@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createScalarReplAggregatesPass());
 }
 
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
                                                   int Threshold) {
   unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
@@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
 void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createVerifierPass());
 }
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c3ca852..8178c27 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
 /// optimization, which scans the uses of an alloca and determines if it can
 /// rewrite it in terms of a single new alloca that can be mem2reg'd.
 class ConvertToScalarInfo {
-  /// AllocaSize - The size of the alloca being considered.
+  /// AllocaSize - The size of the alloca being considered in bytes.
   unsigned AllocaSize;
   const TargetData &TD;
 
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
   /// also declared as a vector, we do want to promote to a vector.
   bool HadAVector;
 
+  /// HadNonMemTransferAccess - True if there is at least one access to the 
+  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
+  /// large integers unless there is some potential for optimization.
+  bool HadNonMemTransferAccess;
+
 public:
   explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
-    : AllocaSize(Size), TD(td) {
-    IsNotTrivial = false;
-    VectorTy = 0;
-    HadAVector = false;
-  }
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+      HadAVector(false), HadNonMemTransferAccess(false) { }
 
   AllocaInst *TryConvert(AllocaInst *AI);
 
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+  bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
 
   Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
   } else {
+    unsigned BitWidth = AllocaSize * 8;
+    if (!HadAVector && !HadNonMemTransferAccess &&
+        !TD.fitsInLegalInteger(BitWidth))
+      return 0;
+
     DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
     // Create and insert the integer alloca.
-    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+    NewTy = IntegerType::get(AI->getContext(), BitWidth);
   }
   AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
   ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
 /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
 /// so far at the offset specified by Offset (which is specified in bytes).
 ///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
 ///   1) A union of vector types of the same size and potentially its elements.
 ///      Here we turn element accesses into insert/extract element operations.
 ///      This promotes a <4 x float> with a store of float to the third element
 ///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
+///   2) A union of vector types with power-of-2 size differences, e.g. a float,
+///      <2 x float> and <4 x float>.  Here we turn element accesses into insert
+///      and extract element operations, and <2 x float> accesses into a cast to
+///      <2 x double>, an extract, and a cast back to <2 x float>.
+///   3) A fully general blob of memory, which we turn into some (potentially
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.  We mark this by setting VectorTy
 ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+                                      bool IsLoadOrStore) {
   // If we already decided to turn this into a blob of integer memory, there is
   // nothing to be done.
   if (VectorTy && VectorTy->isVoidTy())
@@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   // If the In type is a vector that is the same size as the alloca, see if it
   // matches the existing VecTy.
   if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
-    // Remember if we saw a vector type.
-    HadAVector = true;
-
-    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
-      // If we're storing/loading a vector of the right size, allow it as a
-      // vector.  If this the first vector we see, remember the type so that
-      // we know the element size.  If this is a subsequent access, ignore it
-      // even if it is a differing type but the same size.  Worst case we can
-      // bitcast the resultant vectors.
-      if (VectorTy == 0)
-        VectorTy = VInTy;
+    if (MergeInVectorType(VInTy, Offset))
       return;
-    }
   } else if (In->isFloatTy() || In->isDoubleTy() ||
              (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
               isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // Full width accesses can be ignored, because they can always be turned
+    // into bitcasts.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (IsLoadOrStore && EltSize == AllocaSize)
+      return;
+
     // If we're accessing something that could be an element of a vector, see
     // if the implied vector agrees with what we already have and if Offset is
     // compatible with it.
-    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
-        (VectorTy == 0 ||
-         cast<VectorType>(VectorTy)->getElementType()
-               ->getPrimitiveSizeInBits()/8 == EltSize)) {
-      if (VectorTy == 0)
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+      if (!VectorTy) {
         VectorTy = VectorType::get(In, AllocaSize/EltSize);
-      return;
+        return;
+      }
+
+      unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+                                ->getPrimitiveSizeInBits()/8;
+      if (EltSize == CurrentEltSize)
+        return;
+
+      if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+        return;
     }
   }
 
@@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   VectorTy = Type::getVoidTy(In->getContext());
 }
 
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+                                            uint64_t Offset) {
+  // Remember if we saw a vector type.
+  HadAVector = true;
+
+  // TODO: Support nonzero offsets?
+  if (Offset != 0)
+    return false;
+
+  // Only allow vectors that are a power-of-2 away from the size of the alloca.
+  if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+    return false;
+
+  // If this the first vector we see, remember the type so that we know the
+  // element size.
+  if (!VectorTy) {
+    VectorTy = VInTy;
+    return true;
+  }
+
+  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  unsigned InBitWidth = VInTy->getBitWidth();
+
+  // Vectors of the same size can be converted using a simple bitcast.
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+    return true;
+
+  const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+  const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+  // Do not allow mixed integer and floating-point accesses from vectors of
+  // different sizes.
+  if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+    return false;
+
+  if (ElementTy->isFloatingPointTy()) {
+    // Only allow floating-point vectors of different sizes if they have the
+    // same element type.
+    // TODO: This could be loosened a bit, but would anything benefit?
+    if (ElementTy != InElementTy)
+      return false;
+
+    // There are no arbitrary-precision floating-point types, which limits the
+    // number of legal vector types with larger element types that we can form
+    // to bitcast and extract a subvector.
+    // TODO: We could support some more cases with mixed fp128 and double here.
+    if (!(BitWidth == 64 || BitWidth == 128) ||
+        !(InBitWidth == 64 || InBitWidth == 128))
+      return false;
+  } else {
+    assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+                                       "or floating-point.");
+    unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+    unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+    // Do not allow integer types smaller than a byte or types whose widths are
+    // not a multiple of a byte.
+    if (BitWidth < 8 || InBitWidth < 8 ||
+        BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+      return false;
+  }
+
+  // Pick the largest of the two vector types.
+  if (InBitWidth > BitWidth)
+    VectorTy = VInTy;
+
+  return true;
+}
+
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
 /// its accesses to a single vector type, return true and set VecTy to
 /// the new type.  If we could convert the alloca into a single promotable
@@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       // Don't touch MMX operations.
       if (LI->getType()->isX86_MMXTy())
         return false;
-      MergeInType(LI->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(LI->getType(), Offset, true);
       continue;
     }
 
@@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       // Don't touch MMX operations.
       if (SI->getOperand(0)->getType()->isX86_MMXTy())
         return false;
-      MergeInType(SI->getOperand(0)->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(SI->getOperand(0)->getType(), Offset, true);
       continue;
     }
 
@@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (!CanConvertToScalar(GEP, Offset+GEPOffset))
         return false;
       IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
       continue;
     }
 
@@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
           !isa<ConstantInt>(MSI->getLength()))
         return false;
       IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
       continue;
     }
 
@@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
   }
 }
 
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+                                        unsigned NewBitWidth) {
+  bool IsFP1 = Ty1->isFloatingPointTy() ||
+               (Ty1->isVectorTy() &&
+                cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+  bool IsFP2 = Ty2->isFloatingPointTy() ||
+               (Ty2->isVectorTy() &&
+                cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+  LLVMContext &Context = Ty1->getContext();
+
+  // Prefer floating-point types over integer types, as integer types may have
+  // been created by earlier scalar replacement.
+  if (IsFP1 || IsFP2) {
+    if (NewBitWidth == 32)
+      return Type::getFloatTy(Context);
+    if (NewBitWidth == 64)
+      return Type::getDoubleTy(Context);
+  }
+
+  return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+                                      IRBuilder<> &Builder) {
+  const Type *FromType = FromVal->getType();
+  const VectorType *FromVTy = cast<VectorType>(FromType);
+  const VectorType *ToVTy = cast<VectorType>(ToType);
+  assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+         "Vectors must have the same element type");
+   Value *UnV = UndefValue::get(FromType);
+   unsigned numEltsFrom = FromVTy->getNumElements();
+   unsigned numEltsTo = ToVTy->getNumElements();
+
+   SmallVector<Constant*, 3> Args;
+   const Type* Int32Ty = Builder.getInt32Ty();
+   unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+   unsigned i;
+   for (i=0; i != minNumElts; ++i)
+     Args.push_back(ConstantInt::get(Int32Ty, i));
+
+   if (i < numEltsTo) {
+     Constant* UnC = UndefValue::get(Int32Ty);
+     for (; i != numEltsTo; ++i)
+       Args.push_back(UnC);
+   }
+   Constant *Mask = ConstantVector::get(Args);
+   return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
 /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
 /// or vector value FromVal, extracting the bits from the offset specified by
 /// Offset.  This returns the value, which is of type ToType.
@@ -589,14 +734,46 @@ Value *ConvertToScalarInfo::
 ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
                            uint64_t Offset, IRBuilder<> &Builder) {
   // If the load is of the whole new alloca, no conversion is needed.
-  if (FromVal->getType() == ToType && Offset == 0)
+  const Type *FromType = FromVal->getType();
+  if (FromType == ToType && Offset == 0)
     return FromVal;
 
   // If the result alloca is a vector type, this is either an element
   // access or a bitcast to another vector type of the same size.
-  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy())
-      return Builder.CreateBitCast(FromVal, ToType, "tmp");
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+    if (ToTypeSize == AllocaSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (FromType->getPrimitiveSizeInBits() ==
+          ToType->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(FromVal, ToType, "tmp");
+      else
+        return CreateShuffleVectorCast(FromVal, ToType, Builder);
+    }
+
+    if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+      assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+             "of a smaller vector type at a nonzero offset.");
+
+      const Type *CastElementTy = getScaledElementType(FromType, ToType,
+                                                       ToTypeSize * 8);
+      unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+      LLVMContext &Context = FromVal->getContext();
+      const Type *CastTy = VectorType::get(CastElementTy,
+                                           NumCastVectorElements);
+      Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Extract, ToType, "tmp");
+    }
 
     // Otherwise it must be an element access.
     unsigned Elt = 0;
@@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
 
     // Changing the whole vector with memset or with an access of a different
     // vector type?
-    if (ValSize == VecSize)
-      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+    if (ValSize == VecSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (VTy->getPrimitiveSizeInBits() ==
+          SV->getType()->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(SV, AllocaType, "tmp");
+      else
+        return CreateShuffleVectorCast(SV, VTy, Builder);
+    }
 
-    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    if (isPowerOf2_64(VecSize / ValSize)) {
+      assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+             "value of a smaller vector type at a nonzero offset.");
 
-    // Must be an element insertion.
-    unsigned Elt = Offset/EltSize;
+      const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+                                                       ValSize);
+      unsigned NumCastVectorElements = VecSize / ValSize;
 
-    if (SV->getType() != VTy->getElementType())
-      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+      LLVMContext &Context = SV->getContext();
+      const Type *OldCastTy = VectorType::get(CastElementTy,
+                                              NumCastVectorElements);
+      Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
 
-    SV = Builder.CreateInsertElement(Old, SV,
+      Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Insert =
+        Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+    }
+
+    // Must be an element insertion.
+    assert(SV->getType() == VTy->getElementType());
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    unsigned Elt = Offset/EltSize;
+    return Builder.CreateInsertElement(Old, SV,
                      ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
                                      "tmp");
-    return SV;
   }
 
   // If SV is a first-class aggregate value, insert each value recursively.
@@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
     }
     
     const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
-    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+                                     PN->getName()+".ld", PN);
 
     // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
     // matter which one we get and if any differ, it doesn't matter.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ce5dd73..1137c2b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   if (UseLLVMTrap) {
     Function *TrapFn =
       Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
-    CallInst::Create(TrapFn, "", I);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
   }
   new UnreachableInst(I->getContext(), I);
   
@@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
     PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
     if (RetBlockPHI == 0) {
       Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
-      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+      pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+                                    std::distance(PB, PE), "merge",
                                     &RetBlock->front());
       
-      for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
-           PI != E; ++PI)
+      for (pred_iterator PI = PB; PI != PE; ++PI)
         RetBlockPHI->addIncoming(InVal, *PI);
       RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
     }
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
deleted file mode 100644
index 70ff32e..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies an experimental
-// transformation on calls to specific functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls-halfpowr"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-namespace {
-  /// This pass optimizes well half_powr function calls.
-  ///
-  class SimplifyHalfPowrLibCalls : public FunctionPass {
-    const TargetData *TD;
-  public:
-    static char ID; // Pass identification
-    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
-      initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    }
-
-    Instruction *
-    InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                    Instruction *InsertPt);
-  };
-  char SimplifyHalfPowrLibCalls::ID = 0;
-} // end anonymous namespace.
-
-INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
-                "Simplify half_powr library calls", false, false)
-
-// Public interface to the Simplify HalfPowr LibCalls pass.
-FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
-  return new SimplifyHalfPowrLibCalls(); 
-}
-
-/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
-/// their control flow to better facilitate subsequent optimization.
-Instruction *
-SimplifyHalfPowrLibCalls::
-InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                Instruction *InsertPt) {
-  std::vector<BasicBlock *> Bodies;
-  BasicBlock *NewBlock = 0;
-
-  for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
-    CallInst *Call = cast<CallInst>(HalfPowrs[i]);
-    Function *Callee = Call->getCalledFunction();
-
-    // Minimally sanity-check the CFG of half_powr to ensure that it contains
-    // the kind of code we expect.  If we're running this pass, we have
-    // reason to believe it will be what we expect.
-    Function::iterator I = Callee->begin();
-    BasicBlock *Prologue = I++;
-    if (I == Callee->end()) break;
-    BasicBlock *SubnormalHandling = I++;
-    if (I == Callee->end()) break;
-    BasicBlock *Body = I++;
-    if (I != Callee->end()) break;
-    if (SubnormalHandling->getSinglePredecessor() != Prologue)
-      break;
-    BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
-    if (!PBI || !PBI->isConditional())
-      break;
-    BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
-    if (!SNBI || SNBI->isConditional())
-      break;
-    if (!isa<ReturnInst>(Body->getTerminator()))
-      break;
-
-    Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
-
-    // Inline the call, taking care of what code ends up where.
-    NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
-
-    InlineFunctionInfo IFI(0, TD);
-    bool B = InlineFunction(Call, IFI);
-    assert(B && "half_powr didn't inline?");
-    (void)B;
-
-    BasicBlock *NewBody = NewBlock->getSinglePredecessor();
-    assert(NewBody);
-    Bodies.push_back(NewBody);
-  }
-
-  if (!NewBlock)
-    return InsertPt;
-
-  // Put the code for all the bodies into one block, to facilitate
-  // subsequent optimization.
-  (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
-  for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
-    BasicBlock *Body = Bodies[i];
-    Instruction *FNP = Body->getFirstNonPHI();
-    // Splice the insts from body into NewBlock.
-    NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
-                                   FNP, Body->getTerminator());
-  }
-
-  return NewBlock->begin();
-}
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
-  TD = getAnalysisIfAvailable<TargetData>();
-  
-  bool Changed = false;
-  std::vector<Instruction *> HalfPowrs;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Look for calls.
-      bool IsHalfPowr = false;
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        // Look for direct calls and calls to non-external functions.
-        Function *Callee = CI->getCalledFunction();
-        if (Callee && Callee->hasExternalLinkage()) {
-          // Look for calls with well-known names.
-          if (Callee->getName() == "__half_powrf4")
-            IsHalfPowr = true;
-        }
-      }
-      if (IsHalfPowr)
-        HalfPowrs.push_back(I);
-      // We're looking for sequences of up to three such calls, which we'll
-      // simplify as a group.
-      if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
-        I = InlineHalfPowrs(HalfPowrs, I);
-        E = I->getParent()->end();
-        HalfPowrs.clear();
-        Changed = true;
-      }
-    }
-    assert(HalfPowrs.empty() && "Block had no terminator!");
-  }
-
-  return Changed;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9f136d4..6247b03 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -49,6 +49,7 @@ class LibCallOptimization {
 protected:
   Function *Caller;
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   LLVMContext* Context;
 public:
   LibCallOptimization() { }
@@ -62,9 +63,11 @@ public:
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
 
-  Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
     this->TD = TD;
+    this->TLI = TLI;
     if (CI->getCalledFunction())
       Context = &CI->getCalledFunction()->getContext();
 
@@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   }
   return true;
 }
+ 
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
 
 /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
 /// comparisons with With.
@@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization {
 // 'printf' Optimizations
 
 struct PrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require one fixed pointer argument and an integer/void result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
-        !(FT->getReturnType()->isIntegerTy() ||
-          FT->getReturnType()->isVoidTy()))
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
 // 'sprintf' Optimizations
 
 struct SPrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed pointer arguments and an integer result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
@@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization {
 // 'fprintf' Optimizations
 
 struct FPrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed paramters as pointers and integer result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // All the optimizations depend on the format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
@@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
       // Set the builder to the instruction after the call.
       Builder.SetInsertPoint(BB, I);
 
+      // Use debug location of CI for all new instructions.
+      Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
       // Try to optimize this call.
-      Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+      Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
       if (Result == 0) continue;
 
       DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5b6bc04..539cc6f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -36,7 +36,7 @@
 //     evaluated each time through the tail recursion.  Safely keeping allocas
 //     in the entry block requires analysis to proves that the tail-called
 //     function does not read or write the stack object.
-//  2. Tail recursion is only performed if the call immediately preceeds the
+//  2. Tail recursion is only performed if the call immediately precedes the
 //     return instruction.  It's possible that there could be a jump between
 //     the call and the return.
 //  3. There can be intervening operations between the call and the return that
@@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     if (CanMoveAboveCall(BBI, CI)) continue;
     
     // If we can't move the instruction above the call, it might be because it
-    // is an associative and commutative operation that could be tranformed
+    // is an associative and commutative operation that could be transformed
     // using accumulator recursion elimination.  Check to see if this is the
     // case, and if so, remember the initial accumulator value for later.
     if ((AccumulatorRecursionEliminationInitVal =
@@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     Instruction *InsertPos = OldEntry->begin();
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
          I != E; ++I) {
-      PHINode *PN = PHINode::Create(I->getType(),
+      PHINode *PN = PHINode::Create(I->getType(), 2,
                                     I->getName() + ".tr", InsertPos);
       I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
       PN->addIncoming(I, NewEntry);
@@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
   if (AccumulatorRecursionEliminationInitVal) {
     Instruction *AccRecInstr = AccumulatorRecursionInstr;
     // Start by inserting a new PHI node for the accumulator.
+    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
     PHINode *AccPN =
       PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+                      std::distance(PB, PE) + 1,
                       "accumulator.tr", OldEntry->begin());
 
     // Loop over all of the predecessors of the tail recursion block.  For the
@@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     // other tail recursions eliminated) the accumulator is not modified.
     // Because we haven't added the branch in the current block to OldEntry yet,
     // it will not show up as a predecessor.
-    for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
-         PI != PE; ++PI) {
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
       BasicBlock *P = *PI;
       if (P == &F->getEntryBlock())
         AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
@@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
 
   // Now that all of the PHI nodes are in place, remove the call and
   // ret instructions, replacing them with an unconditional branch.
-  BranchInst::Create(OldEntry, Ret);
+  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+  NewBI->setDebugLoc(CI->getDebugLoc());
+
   BB->getInstList().erase(Ret);  // Remove return.
   BB->getInstList().erase(CI);   // Remove call.
   ++NumEliminated;