465 files changed, 17513 insertions, 14723 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 4ae8859..808e6fa 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -80,7 +80,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
   
   // First thing is first.  We only want to think about integer here, so if
   // we have something in FP form, recast it as integer.
-  if (DstEltTy->isFloatingPoint()) {
+  if (DstEltTy->isFloatingPointTy()) {
     // Fold to an vector of integers with same size as our FP type.
     unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
     const Type *DestIVTy =
@@ -95,7 +95,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
   
   // Okay, we know the destination is integer, if the input is FP, convert
   // it to integer first.
-  if (SrcEltTy->isFloatingPoint()) {
+  if (SrcEltTy->isFloatingPointTy()) {
     unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
     const Type *SrcIVTy =
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
@@ -517,6 +517,42 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
   return 0;
 }
 
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
+                                const Type *ResultTy,
+                                const TargetData *TD) {
+  if (!TD) return 0;
+  const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+  bool Any = false;
+  SmallVector<Constant*, 32> NewIdxs;
+  for (unsigned i = 1; i != NumOps; ++i) {
+    if ((i == 1 ||
+         !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+                                                            reinterpret_cast<Value *const *>(Ops+1),
+                                                            i-1))) &&
+        Ops[i]->getType() != IntPtrTy) {
+      Any = true;
+      NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                      true,
+                                                                      IntPtrTy,
+                                                                      true),
+                                              Ops[i], IntPtrTy));
+    } else
+      NewIdxs.push_back(Ops[i]);
+  }
+  if (!Any) return 0;
+
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  return C;
+}
+
 /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
@@ -676,10 +712,10 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
-  for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) {
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) {
     Constant *NewC = cast<Constant>(*i);
     // Recursively fold the ConstantExpr's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
@@ -810,6 +846,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
+    if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+      return C;
     if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
       return C;
     
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 4ba837a..258f1db 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -725,6 +725,29 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
   return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
 }
 
+/// CreateArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIFactory::CreateArtificialType(DIType Ty) {
+  if (Ty.isArtificial())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty.getNode();
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | DIType::FlagArtificial;
+
+  // Flags are stored at this slot.
+  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
 
 /// CreateDerivedType - Create a derived type like const qualified type,
 /// pointer, typedef, etc.
@@ -794,7 +817,8 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
                                                unsigned Flags,
                                                DIType DerivedFrom,
                                                DIArray Elements,
-                                               unsigned RuntimeLang) {
+                                               unsigned RuntimeLang,
+                                               MDNode *ContainingType) {
 
   Value *Elts[] = {
     GetTagConstant(Tag),
@@ -808,9 +832,10 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     DerivedFrom.getNode(),
     Elements.getNode(),
-    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
+    ContainingType
   };
-  return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
+  return DICompositeType(MDNode::get(VMContext, &Elts[0], 13));
 }
 
 
@@ -858,7 +883,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
                                          bool isLocalToUnit,
                                          bool isDefinition,
                                          unsigned VK, unsigned VIndex,
-                                         DIType ContainingType) {
+                                         DIType ContainingType,
+                                         bool isArtificial) {
 
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -874,9 +900,10 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
     ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
     ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
     ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
-    ContainingType.getNode()
+    ContainingType.getNode(),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial)
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
 }
 
 /// CreateSubprogramDefinition - Create new subprogram descriptor for the
@@ -900,9 +927,10 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration)
     ConstantInt::get(Type::getInt1Ty(VMContext), true),
     DeclNode->getOperand(11), // Virtuality
     DeclNode->getOperand(12), // VIndex
-    DeclNode->getOperand(13)  // Containting Type
+    DeclNode->getOperand(13), // Containting Type
+    DeclNode->getOperand(14)  // isArtificial
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 15));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1033,6 +1061,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                                       Instruction *InsertBefore) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(D.getNode() && "empty DIVariable passed to dbg.declare");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -1044,19 +1074,27 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
                                       BasicBlock *InsertAtEnd) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(D.getNode() && "empty DIVariable passed to dbg.declare");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
                     D.getNode() };
-  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
-}
+
+  // If this block already has a terminator then insert this intrinsic
+  // before the terminator.
+  if (TerminatorInst *T = InsertAtEnd->getTerminator()) 
+    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+  else
+    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);}
 
 /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
 Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
+  assert(D.getNode() && "empty DIVariable passed to dbg.value");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -1071,6 +1109,7 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
+  assert(D.getNode() && "empty DIVariable passed to dbg.value");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index e803a48..ec94bc8 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -486,7 +486,7 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
     if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
     if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
 
-    // If the the two pointers are derived from two different non-addr-taken
+    // If the two pointers are derived from two different non-addr-taken
     // globals, or if one is and the other isn't, we know these can't alias.
     if ((GV1 || GV2) && GV1 != GV2)
       return NoAlias;
diff --git a/lib/Analysis/IPA/Makefile b/lib/Analysis/IPA/Makefile
index da719ba..b850c9f 100644
--- a/lib/Analysis/IPA/Makefile
+++ b/lib/Analysis/IPA/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMipa
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 38611cc..4ce6868 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/AsmAnnotationWriter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -35,42 +36,30 @@ Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
 
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L.  An outer loop
-/// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
-  // This is very common, put it first.
-  if (isa<SCEVConstant>(S))
-    return false;
-  if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (unsigned int i=0; i< AE->getNumOperands(); i++)
-      if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
-        return true;
-    return false;
-  }
-  if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
-    if (const Loop *newLoop = AE->getLoop()) {
-      if (newLoop == L)
-        return false;
-      // if newLoop is an outer loop of L, this is OK.
-      if (newLoop->contains(L))
-        return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers.
+static void CollectSubexprs(const SCEV *S,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, Ops, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(AR->getStart(), Ops, SE);
+      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop()), Ops, SE);
+      return;
     }
-    return true;
   }
-  if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
-    return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
-           containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#if 0
-  // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
-  // need this when it is.
-  if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
-    return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
-           containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#endif
-  if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
-    return containsAddRecFromDifferentLoop(CE->getOperand(), L);
-  return false;
+
+  // Otherwise use the value itself.
+  Ops.push_back(S);
 }
 
 /// getSCEVStartAndStride - Compute the start and stride of this expression,
@@ -89,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
     for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
       if (const SCEVAddRecExpr *AddRec =
-             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
-        if (AddRec->getLoop() == L)
-          TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-        else
-          return false;  // Nested IV of some sort?
-      } else {
+             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
+        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
+      else
         Start = SE->getAddExpr(Start, AE->getOperand(i));
-      }
   } else if (isa<SCEVAddRecExpr>(SH)) {
     TheAddRec = SH;
   } else {
     return false;  // not analyzable.
   }
 
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
-  if (!AddRec || AddRec->getLoop() != L) return false;
+  // Break down TheAddRec into its component parts.
+  SmallVector<const SCEV *, 4> Subexprs;
+  CollectSubexprs(TheAddRec, Subexprs, *SE);
+
+  // Look for an addrec on the current loop among the parts.
+  const SCEV *AddRecStride = 0;
+  for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
+       E = Subexprs.end(); I != E; ++I) {
+    const SCEV *S = *I;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      if (AR->getLoop() == L) {
+        *I = AR->getStart();
+        AddRecStride = AR->getStepRecurrence(*SE);
+        break;
+      }
+  }
+  if (!AddRecStride)
+    return false;
+
+  // Add up everything else into a start value (which may not be
+  // loop-invariant).
+  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
 
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
-  const SCEV *AddRecStart = AddRec->getStart();
   AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
-
-  // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
-  // than an outer loop of the current loop, reject it.  LSR has no concept of
-  // operating on more than one loop at a time so don't confuse it with such
-  // expressions.
-  if (containsAddRecFromDifferentLoop(AddRecStart, L))
-    return false;
 
   Start = SE->getAddExpr(Start, AddRecStart);
 
@@ -130,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
 
     DEBUG(dbgs() << "[";
           WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRec << "\n");
+          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
   }
 
   Stride = AddRecStride;
@@ -246,14 +242,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     }
 
     if (AddUserToIVUsers) {
-      IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
-      if (!StrideUses) {    // First occurrence of this stride?
-        StrideOrder.push_back(Stride);
-        StrideUses = new IVUsersOfOneStride(Stride);
-        IVUses.push_back(StrideUses);
-        IVUsesByStride[Stride] = StrideUses;
-      }
-
       // Okay, we found a user that we cannot reduce.  Analyze the instruction
       // and decide what to do with it.  If we are a use inside of the loop, use
       // the value before incrementation, otherwise use it after incrementation.
@@ -261,27 +249,21 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
         const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        StrideUses->addUser(NewStart, User, I);
-        StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
+        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
+        IVUses.back().setIsUseOfPostIncrementedValue(true);
         DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
       } else {
-        StrideUses->addUser(Start, User, I);
+        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
       }
     }
   }
   return true;
 }
 
-void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
-                      Instruction *User, Value *Operand) {
-  IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
-  if (!StrideUses) {    // First occurrence of this stride?
-    StrideOrder.push_back(Stride);
-    StrideUses = new IVUsersOfOneStride(Stride);
-    IVUses.push_back(StrideUses);
-    IVUsesByStride[Stride] = StrideUses;
-  }
-  IVUsesByStride[Stride]->addUser(Offset, User, Operand);
+IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+                              Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+  return IVUses.back();
 }
 
 IVUsers::IVUsers()
@@ -315,15 +297,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 /// value of the OperandValToReplace of the given IVStrideUse.
 const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
   // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
   // Add the offset in a separate step, because it may be loop-variant.
   RetVal = SE->getAddExpr(RetVal, U.getOffset());
   // For uses of post-incremented values, add an extra stride to compute
   // the actual replacement value.
   if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
+    RetVal = SE->getAddExpr(RetVal, U.getStride());
   return RetVal;
 }
 
@@ -332,9 +314,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
 /// isUseOfPostIncrementedValue flag.
 const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
   // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
   // Add the offset in a separate step, because it may be loop-variant.
   RetVal = SE->getAddExpr(RetVal, U.getOffset());
   return RetVal;
@@ -349,24 +331,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
   }
   OS << ":\n";
 
-  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
-      IVUsesByStride.find(StrideOrder[Stride]);
-    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
-    OS << "  Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
-
-    for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
-         E = SI->second->Users.end(); UI != E; ++UI) {
-      OS << "    ";
-      WriteAsOperand(OS, UI->getOperandValToReplace(), false);
-      OS << " = ";
-      OS << *getReplacementExpr(*UI);
-      if (UI->isUseOfPostIncrementedValue())
-        OS << " (post-inc)";
-      OS << " in ";
-      UI->getUser()->print(OS);
-      OS << '\n';
-    }
+  // Use a defualt AssemblyAnnotationWriter to suppress the default info
+  // comments, which aren't relevant here.
+  AssemblyAnnotationWriter Annotator;
+  for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+       E = IVUses.end(); UI != E; ++UI) {
+    OS << "  ";
+    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+    OS << " = "
+       << *getReplacementExpr(*UI);
+    if (UI->isUseOfPostIncrementedValue())
+      OS << " (post-inc)";
+    OS << " in  ";
+    UI->getUser()->print(OS, &Annotator);
+    OS << '\n';
   }
 }
 
@@ -375,14 +353,12 @@ void IVUsers::dump() const {
 }
 
 void IVUsers::releaseMemory() {
-  IVUsesByStride.clear();
-  StrideOrder.clear();
   Processed.clear();
   IVUses.clear();
 }
 
 void IVStrideUse::deleted() {
   // Remove this user from the list.
-  Parent->Users.erase(this);
+  Parent->IVUses.erase(this);
   // this now dangles!
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 651c918..972d034 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -25,26 +25,28 @@ unsigned InlineCostAnalyzer::FunctionInfo::
          CountCodeReductionForConstant(Value *V) {
   unsigned Reduction = 0;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<BranchInst>(*UI))
-      Reduction += 40;          // Eliminating a conditional branch is a big win
-    else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI))
-      // Eliminating a switch is a big win, proportional to the number of edges
-      // deleted.
-      Reduction += (SI->getNumSuccessors()-1) * 40;
-    else if (isa<IndirectBrInst>(*UI))
-      // Eliminating an indirect branch is a big win.
-      Reduction += 200;
-    else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) {
+      // We will be able to eliminate all but one of the successors.
+      const TerminatorInst &TI = cast<TerminatorInst>(**UI);
+      const unsigned NumSucc = TI.getNumSuccessors();
+      unsigned Instrs = 0;
+      for (unsigned I = 0; I != NumSucc; ++I)
+        Instrs += TI.getSuccessor(I)->size();
+      // We don't know which blocks will be eliminated, so use the average size.
+      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
       // Turning an indirect call into a direct call is a BIG win
-      Reduction += CI->getCalledValue() == V ? 500 : 0;
+      if (CI->getCalledValue() == V)
+        Reduction += InlineConstants::IndirectCallBonus;
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
       // Turning an indirect call into a direct call is a BIG win
-      Reduction += II->getCalledValue() == V ? 500 : 0;
+      if (II->getCalledValue() == V)
+        Reduction += InlineConstants::IndirectCallBonus;
     } else {
       // Figure out if this instruction will be removed due to simple constant
       // propagation.
       Instruction &Inst = cast<Instruction>(**UI);
-      
+
       // We can't constant propagate instructions which have effects or
       // read memory.
       //
@@ -53,7 +55,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
       // Unfortunately, we don't know the pointer that may get propagated here,
       // so we can't make this decision.
       if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocaInst>(Inst)) 
+          isa<AllocaInst>(Inst))
         continue;
 
       bool AllOperandsConstant = true;
@@ -65,7 +67,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
 
       if (AllOperandsConstant) {
         // We will get to remove this instruction...
-        Reduction += 7;
+        Reduction += InlineConstants::InstrCost;
 
         // And any other instructions that use it which become constants
         // themselves.
@@ -87,11 +89,14 @@ unsigned InlineCostAnalyzer::FunctionInfo::
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
     Instruction *I = cast<Instruction>(*UI);
     if (isa<LoadInst>(I) || isa<StoreInst>(I))
-      Reduction += 10;
+      Reduction += InlineConstants::InstrCost;
     else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
       // If the GEP has variable indices, we won't be able to do much with it.
-      if (!GEP->hasAllConstantIndices())
-        Reduction += CountCodeReductionForAlloca(GEP)+15;
+      if (GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+      // Track pointer through bitcasts.
+      Reduction += CountCodeReductionForAlloca(BCI);
     } else {
       // If there is some other strange instruction, we're not going to be able
       // to do much if we inline this.
@@ -158,10 +163,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
             (F->getName() == "setjmp" || F->getName() == "_setjmp"))
           NeverInline = true;
 
-      // Calls often compile into many machine instructions.  Bump up their
-      // cost to reflect this.
-      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction()))
-        NumInsts += InlineConstants::CallPenalty;
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+        // Each argument to a call takes on average one instruction to set up.
+        NumInsts += CS.arg_size();
+        ++NumCalls;
+      }
     }
     
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -223,8 +229,14 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
   if (Metrics.NumRets==1)
     --Metrics.NumInsts;
 
+  // Don't bother calculating argument weights if we are never going to inline
+  // the function anyway.
+  if (Metrics.NeverInline)
+    return;
+
   // Check out all of the arguments to the function, figuring out how much
   // code can be eliminated if one of the arguments is a constant.
+  ArgumentWeights.reserve(F->arg_size());
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
     ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
                                       CountCodeReductionForAlloca(I)));
@@ -313,23 +325,18 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
        I != E; ++I, ++ArgNo) {
     // Each argument passed in has a cost at both the caller and the callee
-    // sides.  This favors functions that take many arguments over functions
-    // that take few arguments.
-    InlineCost -= 20;
-    
-    // If this is a function being passed in, it is very likely that we will be
-    // able to turn an indirect function call into a direct function call.
-    if (isa<Function>(I))
-      InlineCost -= 100;
-    
+    // sides.  Measurements show that each argument costs about the same as an
+    // instruction.
+    InlineCost -= InlineConstants::InstrCost;
+
     // If an alloca is passed in, inlining this function is likely to allow
     // significant future optimization possibilities (like scalar promotion, and
     // scalarization), so encourage the inlining of the function.
     //
-    else if (isa<AllocaInst>(I)) {
+    if (isa<AllocaInst>(I)) {
       if (ArgNo < CalleeFI.ArgumentWeights.size())
         InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;
-      
+
       // If this is a constant being passed into the function, use the argument
       // weights calculated for the callee to determine how much will be folded
       // away with this information.
@@ -341,14 +348,17 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   
   // Now that we have considered all of the factors that make the call site more
   // likely to be inlined, look at factors that make us not want to inline it.
-  
+
+  // Calls usually take a long time, so they make the inlining gain smaller.
+  InlineCost += CalleeFI.Metrics.NumCalls * InlineConstants::CallPenalty;
+
   // Don't inline into something too big, which would make it bigger.
   // "size" here is the number of basic blocks, not instructions.
   //
   InlineCost += Caller->size()/15;
   
   // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI.Metrics.NumInsts*5;
+  InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost;
 
   return llvm::InlineCost::get(InlineCost);
 }
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 02ec7d3..1b91d93 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -184,7 +184,7 @@ LiveValues::Memo &LiveValues::compute(const Value *V) {
     }
   }
 
-  // If the value was never used outside the the block in which it was
+  // If the value was never used outside the block in which it was
   // defined, it's killed in that block.
   if (!LiveOutOfDefBB)
     M.Killed.insert(DefBB);
diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile
index f61b8aa..4af6d35 100644
--- a/lib/Analysis/Makefile
+++ b/lib/Analysis/Makefile
@@ -11,7 +11,6 @@ LEVEL = ../..
 LIBRARYNAME = LLVMAnalysis
 DIRS = IPA
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index b448628..297b588 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 //  malloc Call Utility Functions.
 //
 
-/// isMalloc - Returns true if the the value is either a malloc call or a
+/// isMalloc - Returns true if the value is either a malloc call or a
 /// bitcast of the result of a malloc call.
 bool llvm::isMalloc(const Value *I) {
   return extractMallocCall(I) || extractMallocCallFromBitCast(I);
@@ -183,7 +183,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
 //  free Call Utility Functions.
 //
 
-/// isFreeCall - Returns true if the the value is a call to the builtin free()
+/// isFreeCall - Returns true if the value is a call to the builtin free()
 bool llvm::isFreeCall(const Value *I) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI)
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 2f44913..9ee7d3a 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
                                    const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot truncate non-integer value!");
 }
 
@@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot zero extend non-integer value!");
 }
 
@@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot sign extend non-integer value!");
 }
 
@@ -312,6 +312,21 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
   return true;
 }
 
+bool
+SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+  return DT->dominates(L->getHeader(), BB) &&
+         SCEVNAryExpr::dominates(BB, DT);
+}
+
+bool
+SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  // This uses a "dominates" query instead of "properly dominates" query because
+  // the instruction which produces the addrec's value is a PHI, and a PHI
+  // effectively properly dominates its entire containing block.
+  return DT->dominates(L->getHeader(), BB) &&
+         SCEVNAryExpr::properlyDominates(BB, DT);
+}
+
 void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -321,15 +336,6 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << ">";
 }
 
-void SCEVFieldOffsetExpr::print(raw_ostream &OS) const {
-  // LLVM struct fields don't have names, so just print the field number.
-  OS << "offsetof(" << *STy << ", " << FieldNo << ")";
-}
-
-void SCEVAllocSizeExpr::print(raw_ostream &OS) const {
-  OS << "sizeof(" << *AllocTy << ")";
-}
-
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
   // All non-instruction values are loop invariant.  All instructions are loop
   // invariant if they are not contained in the specified loop.
@@ -356,7 +362,91 @@ const Type *SCEVUnknown::getType() const {
   return V->getType();
 }
 
+bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getNumOperands() == 2)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+            if (CI->isOne()) {
+              AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+                                 ->getElementType();
+              return true;
+            }
+
+  return false;
+}
+
+bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked() &&
+                CE->getNumOperands() == 3 &&
+                CE->getOperand(1)->isNullValue()) {
+              if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+                if (CI->isOne() &&
+                    STy->getNumElements() == 2 &&
+                    STy->getElementType(0)->isIntegerTy(1)) {
+                  AllocTy = STy->getElementType(1);
+                  return true;
+                }
+            }
+        }
+
+  return false;
+}
+
+bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getNumOperands() == 3 &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getOperand(1)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          // Ignore vector types here so that ScalarEvolutionExpander doesn't
+          // emit getelementptrs that index into vectors.
+          if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+            CTy = Ty;
+            FieldNo = CE->getOperand(2);
+            return true;
+          }
+        }
+
+  return false;
+}
+
 void SCEVUnknown::print(raw_ostream &OS) const {
+  const Type *AllocTy;
+  if (isSizeOf(AllocTy)) {
+    OS << "sizeof(" << *AllocTy << ")";
+    return;
+  }
+  if (isAlignOf(AllocTy)) {
+    OS << "alignof(" << *AllocTy << ")";
+    return;
+  }
+
+  const Type *CTy;
+  Constant *FieldNo;
+  if (isOffsetOf(CTy, FieldNo)) {
+    OS << "offsetof(" << *CTy << ", ";
+    WriteAsOperand(OS, FieldNo, false);
+    OS << ")";
+    return;
+  }
+
+  // Otherwise just print it normally.
   WriteAsOperand(OS, V, false);
 }
 
@@ -515,21 +605,6 @@ namespace {
         return operator()(LC->getOperand(), RC->getOperand());
       }
 
-      // Compare offsetof expressions.
-      if (const SCEVFieldOffsetExpr *LA = dyn_cast<SCEVFieldOffsetExpr>(LHS)) {
-        const SCEVFieldOffsetExpr *RA = cast<SCEVFieldOffsetExpr>(RHS);
-        if (CompareTypes(LA->getStructType(), RA->getStructType()) ||
-            CompareTypes(RA->getStructType(), LA->getStructType()))
-          return CompareTypes(LA->getStructType(), RA->getStructType());
-        return LA->getFieldNo() < RA->getFieldNo();
-      }
-
-      // Compare sizeof expressions by the allocation type.
-      if (const SCEVAllocSizeExpr *LA = dyn_cast<SCEVAllocSizeExpr>(LHS)) {
-        const SCEVAllocSizeExpr *RA = cast<SCEVAllocSizeExpr>(RHS);
-        return CompareTypes(LA->getAllocType(), RA->getAllocType());
-      }
-
       llvm_unreachable("Unknown SCEV kind!");
       return false;
     }
@@ -2172,74 +2247,38 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy,
-                                                unsigned FieldNo) {
-  // If we have TargetData we can determine the constant offset.
-  if (TD) {
-    const Type *IntPtrTy = TD->getIntPtrType(getContext());
-    const StructLayout &SL = *TD->getStructLayout(STy);
-    uint64_t Offset = SL.getElementOffset(FieldNo);
-    return getIntegerSCEV(Offset, IntPtrTy);
-  }
+const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+  Constant *C = ConstantExpr::getSizeOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    C = ConstantFoldConstantExpression(CE, TD);
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
 
-  // Field 0 is always at offset 0.
-  if (FieldNo == 0) {
-    const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
-    return getIntegerSCEV(0, Ty);
-  }
+const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+  Constant *C = ConstantExpr::getAlignOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    C = ConstantFoldConstantExpression(CE, TD);
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
 
-  // Okay, it looks like we really DO need an offsetof expr.  Check to see if we
-  // already have one, otherwise create a new one.
-  FoldingSetNodeID ID;
-  ID.AddInteger(scFieldOffset);
-  ID.AddPointer(STy);
-  ID.AddInteger(FieldNo);
-  void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVFieldOffsetExpr>();
+const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+                                             unsigned FieldNo) {
+  Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    C = ConstantFoldConstantExpression(CE, TD);
   const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
-  new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo);
-  UniqueSCEVs.InsertNode(S, IP);
-  return S;
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
 }
 
-const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) {
-  // If we have TargetData we can determine the constant size.
-  if (TD && AllocTy->isSized()) {
-    const Type *IntPtrTy = TD->getIntPtrType(getContext());
-    return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy);
-  }
-
-  // Expand an array size into the element size times the number
-  // of elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(AllocTy)) {
-    const SCEV *E = getAllocSizeExpr(ATy->getElementType());
-    return getMulExpr(
-      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
-                                      ATy->getNumElements())));
-  }
-
-  // Expand a vector size into the element size times the number
-  // of elements.
-  if (const VectorType *VTy = dyn_cast<VectorType>(AllocTy)) {
-    const SCEV *E = getAllocSizeExpr(VTy->getElementType());
-    return getMulExpr(
-      E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()),
-                                      VTy->getNumElements())));
-  }
-
-  // Okay, it looks like we really DO need a sizeof expr.  Check to see if we
-  // already have one, otherwise create a new one.
-  FoldingSetNodeID ID;
-  ID.AddInteger(scAllocSize);
-  ID.AddPointer(AllocTy);
-  void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVAllocSizeExpr>();
-  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
-  new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy);
-  UniqueSCEVs.InsertNode(S, IP);
-  return S;
+const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+                                             Constant *FieldNo) {
+  Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    C = ConstantFoldConstantExpression(CE, TD);
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
 }
 
 const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -2269,7 +2308,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
 /// has access to target-specific information.
 bool ScalarEvolution::isSCEVable(const Type *Ty) const {
   // Integers and pointers are always SCEVable.
-  return Ty->isInteger() || isa<PointerType>(Ty);
+  return Ty->isIntegerTy() || isa<PointerType>(Ty);
 }
 
 /// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2282,7 +2321,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
     return TD->getTypeSizeInBits(Ty);
 
   // Integer types have fixed sizes.
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Ty->getPrimitiveSizeInBits();
 
   // The only other support type is pointer. Without TargetData, conservatively
@@ -2298,7 +2337,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
 const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Ty;
 
   // The only other support type is pointer.
@@ -2327,7 +2366,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
 
 /// getIntegerSCEV - Given a SCEVable type, create a constant for the
 /// specified signed integer value and return a SCEV for the constant.
-const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
+const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) {
   const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
   return getConstant(ConstantInt::get(ITy, Val));
 }
@@ -2373,8 +2412,8 @@ const SCEV *
 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2390,8 +2429,8 @@ const SCEV *
 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2406,8 +2445,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
 const SCEV *
 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot noop or zero extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrZeroExtend cannot truncate!");
@@ -2422,8 +2461,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot noop or sign extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrSignExtend cannot truncate!");
@@ -2439,8 +2478,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot noop or any extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrAnyExtend cannot truncate!");
@@ -2454,8 +2493,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+         (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Cannot truncate or noop with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
          "getTruncateOrNoop cannot extend!");
@@ -2527,7 +2566,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
     if (It != Scalars.end()) {
       // Short-circuit the def-use traversal if the symbolic name
       // ceases to appear in expressions.
-      if (!It->second->hasOperand(SymName))
+      if (It->second != SymName && !It->second->hasOperand(SymName))
         continue;
 
       // SCEVUnknown for a PHI either means that it has an unrecognized
@@ -2689,16 +2728,15 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
       // For a struct, add the member offset.
       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
       TotalOffset = getAddExpr(TotalOffset,
-                               getFieldOffsetExpr(STy, FieldNo),
+                               getOffsetOfExpr(STy, FieldNo),
                                /*HasNUW=*/false, /*HasNSW=*/InBounds);
     } else {
       // For an array, add the element offset, explicitly scaled.
       const SCEV *LocalOffset = getSCEV(Index);
-      if (!isa<PointerType>(LocalOffset->getType()))
-        // Getelementptr indicies are signed.
-        LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
+      // Getelementptr indicies are signed.
+      LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
       // Lower "inbounds" GEPs to NSW arithmetic.
-      LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI),
+      LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI),
                                /*HasNUW=*/false, /*HasNSW=*/InBounds);
       TotalOffset = getAddExpr(TotalOffset, LocalOffset,
                                /*HasNUW=*/false, /*HasNSW=*/InBounds);
@@ -2797,62 +2835,67 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return ConstantRange(C->getValue()->getValue());
 
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum unsigned value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getMinValue(BitWidth),
+                    APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     ConstantRange X = getUnsignedRange(Add->getOperand(0));
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getUnsignedRange(Add->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getUnsignedRange(Mul->getOperand(0));
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getUnsignedRange(SMax->getOperand(0));
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getUnsignedRange(SMax->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getUnsignedRange(UMax->getOperand(0));
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getUnsignedRange(UMax->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getUnsignedRange(UDiv->getLHS());
     ConstantRange Y = getUnsignedRange(UDiv->getRHS());
-    return X.udiv(Y);
+    return ConservativeResult.intersectWith(X.udiv(Y));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getUnsignedRange(ZExt->getOperand());
-    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getUnsignedRange(SExt->getOperand());
-    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.signExtend(BitWidth));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getUnsignedRange(Trunc->getOperand());
-    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.truncate(BitWidth));
   }
 
-  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
-
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
-    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
-    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    ConstantRange ConservativeResult = FullSet;
-
     // If there's no unsigned wrap, the value will never be less than its
     // initial value.
     if (AddRec->hasNoUnsignedWrap())
@@ -2862,10 +2905,11 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
                         APInt(getTypeSizeInBits(C->getType()), 0));
 
     // TODO: non-affine addrec
-    if (Trip && AddRec->isAffine()) {
+    if (AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
-      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
@@ -2883,7 +2927,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
                                    EndRange.getUnsignedMax());
         if (Min.isMinValue() && Max.isMaxValue())
           return ConservativeResult;
-        return ConstantRange(Min, Max+1);
+        return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
       }
     }
 
@@ -2897,11 +2941,11 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
     if (Ones == ~Zeros + 1)
-      return FullSet;
-    return ConstantRange(Ones, ~Zeros + 1);
+      return ConservativeResult;
+    return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
   }
 
-  return FullSet;
+  return ConservativeResult;
 }
 
 /// getSignedRange - Determine the signed range for a particular SCEV.
@@ -2912,62 +2956,67 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return ConstantRange(C->getValue()->getValue());
 
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum signed value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getSignedMinValue(BitWidth),
+                    APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     ConstantRange X = getSignedRange(Add->getOperand(0));
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getSignedRange(Add->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getSignedRange(Mul->getOperand(0));
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getSignedRange(Mul->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getSignedRange(SMax->getOperand(0));
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getSignedRange(SMax->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getSignedRange(UMax->getOperand(0));
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getSignedRange(UMax->getOperand(i)));
-    return X;
+    return ConservativeResult.intersectWith(X);
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getSignedRange(UDiv->getLHS());
     ConstantRange Y = getSignedRange(UDiv->getRHS());
-    return X.udiv(Y);
+    return ConservativeResult.intersectWith(X.udiv(Y));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getSignedRange(ZExt->getOperand());
-    return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getSignedRange(SExt->getOperand());
-    return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.signExtend(BitWidth));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getSignedRange(Trunc->getOperand());
-    return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth());
+    return ConservativeResult.intersectWith(X.truncate(BitWidth));
   }
 
-  ConstantRange FullSet(getTypeSizeInBits(S->getType()), true);
-
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
-    const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
-    const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    ConstantRange ConservativeResult = FullSet;
-
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
     if (AddRec->hasNoSignedWrap()) {
@@ -2977,20 +3026,22 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
       }
-      unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
       if (AllNonNeg)
-        ConservativeResult = ConstantRange(APInt(BitWidth, 0),
-                                           APInt::getSignedMinValue(BitWidth));
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt(BitWidth, 0),
+                        APInt::getSignedMinValue(BitWidth)));
       else if (AllNonPos)
-        ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth),
-                                           APInt(BitWidth, 1));
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt::getSignedMinValue(BitWidth),
+                        APInt(BitWidth, 1)));
     }
 
     // TODO: non-affine addrec
-    if (Trip && AddRec->isAffine()) {
+    if (AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
-      if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
@@ -3008,7 +3059,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
                                    EndRange.getSignedMax());
         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
           return ConservativeResult;
-        return ConstantRange(Min, Max+1);
+        return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
       }
     }
 
@@ -3017,18 +3068,17 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    unsigned BitWidth = getTypeSizeInBits(U->getType());
-    if (!U->getValue()->getType()->isInteger() && !TD)
-      return FullSet;
+    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+      return ConservativeResult;
     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
     if (NS == 1)
-      return FullSet;
-    return
+      return ConservativeResult;
+    return ConservativeResult.intersectWith(
       ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
-                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1);
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1));
   }
 
-  return FullSet;
+  return ConservativeResult;
 }
 
 /// createSCEV - We know that there is no SCEV for the specified value.
@@ -3179,7 +3229,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   case Instruction::Shl:
     // Turn shift left of a constant amount into a multiply.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
-      uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
       Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
@@ -3189,7 +3239,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
   case Instruction::LShr:
     // Turn logical shift right of a constant into a unsigned divide.
     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
-      uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
       Constant *X = ConstantInt::get(getContext(),
         APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth)));
       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
@@ -3230,10 +3280,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       return getSCEV(U->getOperand(0));
     break;
 
-    // It's tempting to handle inttoptr and ptrtoint, however this can
-    // lead to pointer expressions which cannot be expanded to GEPs
-    // (because they may overflow). For now, the only pointer-typed
-    // expressions we handle are GEPs and address literals.
+  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+  // lead to pointer expressions which cannot safely be expanded to GEPs,
+  // because ScalarEvolution doesn't respect the GEP aliasing rules when
+  // simplifying integer expressions.
 
   case Instruction::GetElementPtr:
     return createNodeForGEP(cast<GEPOperator>(U));
@@ -3350,19 +3400,19 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
     BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
   if (Pair.second) {
-    BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L);
-    if (ItCount.Exact != getCouldNotCompute()) {
-      assert(ItCount.Exact->isLoopInvariant(L) &&
-             ItCount.Max->isLoopInvariant(L) &&
-             "Computed trip count isn't loop invariant for loop!");
+    BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
+    if (BECount.Exact != getCouldNotCompute()) {
+      assert(BECount.Exact->isLoopInvariant(L) &&
+             BECount.Max->isLoopInvariant(L) &&
+             "Computed backedge-taken count isn't loop invariant for loop!");
       ++NumTripCountsComputed;
 
       // Update the value in the map.
-      Pair.first->second = ItCount;
+      Pair.first->second = BECount;
     } else {
-      if (ItCount.Max != getCouldNotCompute())
+      if (BECount.Max != getCouldNotCompute())
         // Update the value in the map.
-        Pair.first->second = ItCount;
+        Pair.first->second = BECount;
       if (isa<PHINode>(L->getHeader()->begin()))
         // Only count loops that have phi nodes as not being computable.
         ++NumTripCountsNotComputed;
@@ -3373,7 +3423,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
     // conservative estimates made without the benefit of trip count
     // information. This is similar to the code in forgetLoop, except that
     // it handles SCEVUnknown PHI nodes specially.
-    if (ItCount.hasAnyInfo()) {
+    if (BECount.hasAnyInfo()) {
       SmallVector<Instruction *, 16> Worklist;
       PushLoopPHIs(L, Worklist);
 
@@ -4230,9 +4280,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
     return getTruncateExpr(Op, Cast->getType());
   }
 
-  if (isa<SCEVTargetDataConstant>(V))
-    return V;
-
   llvm_unreachable("Unknown SCEV type!");
   return 0;
 }
@@ -4403,7 +4450,7 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
                                             -StartC->getValue()->getValue(),
                                             *this);
     }
-  } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
+  } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
     // the quadratic equation to solve it.
     std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
@@ -4947,6 +4994,9 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
                                         const SCEV *End,
                                         const SCEV *Step,
                                         bool NoWrap) {
+  assert(!isKnownNegative(Step) &&
+         "This code doesn't handle negative strides yet!");
+
   const Type *Ty = Start->getType();
   const SCEV *NegOne = getIntegerSCEV(-1, Ty);
   const SCEV *Diff = getMinusSCEV(End, Start);
@@ -4989,39 +5039,35 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                            AddRec->hasNoUnsignedWrap();
 
   if (AddRec->isAffine()) {
-    // FORNOW: We only support unit strides.
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
 
-    // TODO: handle non-constant strides.
-    const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
-    if (!CStep || CStep->isZero())
+    if (Step->isZero())
       return getCouldNotCompute();
-    if (CStep->isOne()) {
+    if (Step->isOne()) {
       // With unit stride, the iteration never steps past the limit value.
-    } else if (CStep->getValue()->getValue().isStrictlyPositive()) {
-      if (NoWrap) {
-        // We know the iteration won't step past the maximum value for its type.
-        ;
-      } else if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) {
-        // Test whether a positive iteration iteration can step past the limit
-        // value and past the maximum value for its type in a single step.
-        if (isSigned) {
-          APInt Max = APInt::getSignedMaxValue(BitWidth);
-          if ((Max - CStep->getValue()->getValue())
-                .slt(CLimit->getValue()->getValue()))
-            return getCouldNotCompute();
-        } else {
-          APInt Max = APInt::getMaxValue(BitWidth);
-          if ((Max - CStep->getValue()->getValue())
-                .ult(CLimit->getValue()->getValue()))
-            return getCouldNotCompute();
-        }
-      } else
-        // TODO: handle non-constant limit values below.
-        return getCouldNotCompute();
+    } else if (isKnownPositive(Step)) {
+      // Test whether a positive iteration can step past the limit
+      // value and past the maximum value for its type in a single step.
+      // Note that it's not sufficient to check NoWrap here, because even
+      // though the value after a wrap is undefined, it's not undefined
+      // behavior, so if wrap does occur, the loop could either terminate or
+      // loop infinitely, but in either case, the loop is guaranteed to
+      // iterate at least until the iteration where the wrapping occurs.
+      const SCEV *One = getIntegerSCEV(1, Step->getType());
+      if (isSigned) {
+        APInt Max = APInt::getSignedMaxValue(BitWidth);
+        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+              .slt(getSignedRange(RHS).getSignedMax()))
+          return getCouldNotCompute();
+      } else {
+        APInt Max = APInt::getMaxValue(BitWidth);
+        if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+              .ult(getUnsignedRange(RHS).getUnsignedMax()))
+          return getCouldNotCompute();
+      }
     } else
-      // TODO: handle negative strides below.
+      // TODO: Handle negative strides here and below.
       return getCouldNotCompute();
 
     // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
@@ -5054,6 +5100,20 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
       getSignedRange(End).getSignedMax() :
       getUnsignedRange(End).getUnsignedMax());
 
+    // If MaxEnd is within a step of the maximum integer value in its type,
+    // adjust it down to the minimum value which would produce the same effect.
+    // This allows the subsequent ceiling divison of (N+(step-1))/step to
+    // compute the correct value.
+    const SCEV *StepMinusOne = getMinusSCEV(Step,
+                                            getIntegerSCEV(1, Step->getType()));
+    MaxEnd = isSigned ?
+      getSMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+                               StepMinusOne)) :
+      getUMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+                               StepMinusOne));
+
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
     const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index a72f58f..c2e1f89 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -365,31 +365,33 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // the indices index into the element or field type selected by the
   // preceding index.
   for (;;) {
-    const SCEV *ElSize = SE.getAllocSizeExpr(ElTy);
     // If the scale size is not 0, attempt to factor out a scale for
     // array indexing.
     SmallVector<const SCEV *, 8> ScaledOps;
-    if (ElTy->isSized() && !ElSize->isZero()) {
-      SmallVector<const SCEV *, 8> NewOps;
-      for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-        const SCEV *Op = Ops[i];
-        const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
-        if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
-          // Op now has ElSize factored out.
-          ScaledOps.push_back(Op);
-          if (!Remainder->isZero())
-            NewOps.push_back(Remainder);
-          AnyNonZeroIndices = true;
-        } else {
-          // The operand was not divisible, so add it to the list of operands
-          // we'll scan next iteration.
-          NewOps.push_back(Ops[i]);
+    if (ElTy->isSized()) {
+      const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+      if (!ElSize->isZero()) {
+        SmallVector<const SCEV *, 8> NewOps;
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+          const SCEV *Op = Ops[i];
+          const SCEV *Remainder = SE.getIntegerSCEV(0, Ty);
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+            // Op now has ElSize factored out.
+            ScaledOps.push_back(Op);
+            if (!Remainder->isZero())
+              NewOps.push_back(Remainder);
+            AnyNonZeroIndices = true;
+          } else {
+            // The operand was not divisible, so add it to the list of operands
+            // we'll scan next iteration.
+            NewOps.push_back(Ops[i]);
+          }
+        }
+        // If we made any changes, update Ops.
+        if (!ScaledOps.empty()) {
+          Ops = NewOps;
+          SimplifyAddOperands(Ops, Ty, SE);
         }
-      }
-      // If we made any changes, update Ops.
-      if (!ScaledOps.empty()) {
-        Ops = NewOps;
-        SimplifyAddOperands(Ops, Ty, SE);
       }
     }
 
@@ -427,22 +429,22 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
             }
           }
       } else {
-        // Without TargetData, just check for a SCEVFieldOffsetExpr of the
+        // Without TargetData, just check for an offsetof expression of the
         // appropriate struct type.
         for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-          if (const SCEVFieldOffsetExpr *FO =
-                dyn_cast<SCEVFieldOffsetExpr>(Ops[i]))
-            if (FO->getStructType() == STy) {
-              unsigned FieldNo = FO->getFieldNo();
-              GepIndices.push_back(
-                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
-                                   FieldNo));
-              ElTy = STy->getTypeAtIndex(FieldNo);
+          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+            const Type *CTy;
+            Constant *FieldNo;
+            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+              GepIndices.push_back(FieldNo);
+              ElTy =
+                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
               Ops[i] = SE.getConstant(Ty, 0);
               AnyNonZeroIndices = true;
               FoundFieldNo = true;
               break;
             }
+          }
       }
       // If no struct field offsets were found, tentatively assume that
       // field zero was selected (since the zero offset would obviously
@@ -639,8 +641,52 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   // Reuse a previously-inserted PHI, if present.
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
-      return PN;
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(Normalized->getType())) &&
+        SE.getSCEV(PN) == Normalized)
+      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+        Instruction *IncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+        // Determine if this is a well-behaved chain of instructions leading
+        // back to the PHI. It probably will be, if we're scanning an inner
+        // loop already visited by LSR for example, but it wouldn't have
+        // to be.
+        do {
+          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) {
+            IncV = 0;
+            break;
+          }
+          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+          if (!IncV)
+            break;
+          if (IncV->mayHaveSideEffects()) {
+            IncV = 0;
+            break;
+          }
+        } while (IncV != PN);
+
+        if (IncV) {
+          // Ok, the add recurrence looks usable.
+          // Remember this PHI, even in post-inc mode.
+          InsertedValues.insert(PN);
+          // Remember the increment.
+          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+          rememberInstruction(IncV);
+          if (L == IVIncInsertLoop)
+            do {
+              if (SE.DT->dominates(IncV, IVIncInsertPos))
+                break;
+              // Make sure the increment is where we want it. But don't move it
+              // down past a potential existing post-inc user.
+              IncV->moveBefore(IVIncInsertPos);
+              IVIncInsertPos = IncV;
+              IncV = cast<Instruction>(IncV->getOperand(0));
+            } while (IncV != PN);
+          return PN;
+        }
+      }
 
   // Save the original insertion point so we can restore it when we're done.
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
@@ -711,7 +757,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
 
   // Restore the original insert point.
   if (SaveInsertBB)
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
 
   // Remember this PHI, even in post-inc mode.
   InsertedValues.insert(PN);
@@ -774,6 +820,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
 
   // Re-apply any non-loop-dominating scale.
   if (PostLoopScale) {
+    Result = InsertNoopCastOfTo(Result, IntTy);
     Result = Builder.CreateMul(Result,
                                expandCodeFor(PostLoopScale, IntTy));
     rememberInstruction(Result);
@@ -785,6 +832,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
       const SCEV *const OffsetArray[1] = { PostLoopOffset };
       Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
     } else {
+      Result = InsertNoopCastOfTo(Result, IntTy);
       Result = Builder.CreateAdd(Result,
                                  expandCodeFor(PostLoopOffset, IntTy));
       rememberInstruction(Result);
@@ -825,7 +873,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
     return V;
   }
 
@@ -1001,14 +1049,6 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   return LHS;
 }
 
-Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) {
-  return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo());
-}
-
-Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) {
-  return ConstantExpr::getSizeOf(S->getAllocType());
-}
-
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
   // Expand the code for this SCEV.
   Value *V = expand(SH);
@@ -1059,10 +1099,32 @@ Value *SCEVExpander::expand(const SCEV *S) {
   if (!PostIncLoop)
     InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
-  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
 
+void SCEVExpander::rememberInstruction(Value *I) {
+  if (!PostIncLoop)
+    InsertedValues.insert(I);
+
+  // If we just claimed an existing instruction and that instruction had
+  // been the insert point, adjust the insert point forward so that 
+  // subsequently inserted code will be dominated.
+  if (Builder.GetInsertPoint() == I) {
+    BasicBlock::iterator It = cast<Instruction>(I);
+    do { ++It; } while (isInsertedInstruction(It));
+    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+  }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+  // If we aquired more instructions since the old insert point was saved,
+  // advance past them.
+  while (isInsertedInstruction(I)) ++I;
+
+  Builder.SetInsertPoint(BB, I);
+}
+
 /// getOrInsertCanonicalInductionVariable - This method returns the
 /// canonical induction variable of the specified type for the specified
 /// loop (inserting one if there is none).  A canonical induction variable
@@ -1070,13 +1132,13 @@ Value *SCEVExpander::expand(const SCEV *S) {
 Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
-  assert(Ty->isInteger() && "Can only insert integer induction variables!");
+  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
   const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
                                    SE.getIntegerSCEV(1, Ty), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
   Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
   if (SaveInsertBB)
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 91e5bc3..7cc9c0d 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -49,11 +49,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
-         "Not integer or pointer type!");
+  assert((V->getType()->isIntOrIntVectorTy() || isa<PointerType>(V->getType()))
+         && "Not integer or pointer type!");
   assert((!TD ||
           TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
-         (!V->getType()->isIntOrIntVector() ||
+         (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth && 
          KnownOne.getBitWidth() == BitWidth &&
@@ -269,7 +269,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::BitCast: {
     const Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+    if ((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !isa<VectorType>(I->getType())) {
@@ -421,20 +421,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::SRem:
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      APInt RA = Rem->getValue();
-      if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
-        APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
         ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, 
                           Depth+1);
 
-        // If the sign bit of the first operand is zero, the sign bit of
-        // the result is zero. If the first operand has no one bits below
-        // the second operand's single 1 bit, its sign will be zero.
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
         if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
-          KnownZero2 |= ~LowBits;
+          KnownZero |= ~LowBits;
 
-        KnownZero |= KnownZero2 & Mask;
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
 
         assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       }
@@ -640,7 +649,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 ///
 unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
                                   unsigned Depth) {
-  assert((TD || V->getType()->isIntOrIntVector()) &&
+  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
          "ComputeNumSignBits requires a TargetData object to operate "
          "on non-integer values!");
   const Type *Ty = V->getType();
@@ -814,7 +823,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
-  assert(V->getType()->isInteger() && "Not integer or pointer type!");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
 
   const Type *T = V->getType();
 
@@ -1363,7 +1372,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // Make sure the index-ee is a pointer to array of i8.
     const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || !AT->getElementType()->isInteger(8))
+    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
       return false;
     
     // Check to make sure that the first operand of the GEP is an integer and
@@ -1402,7 +1411,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 || !Array->getType()->getElementType()->isInteger(8))
+  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
     return false;
   
   // Get the number of elements in the array
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index 00778d9..f4f8a43 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -14,7 +14,6 @@
 
 #include "ArchiveInternals.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/System/Process.h"
@@ -173,8 +172,8 @@ void Archive::cleanUpMemory() {
     foreignST = 0;
   }
   
-  // Delete any ModuleProviders and ArchiveMember's we've allocated as a result
-  // of symbol table searches.
+  // Delete any Modules and ArchiveMember's we've allocated as a result of
+  // symbol table searches.
   for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
     delete I->second.first;
     delete I->second.second;
@@ -221,51 +220,37 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
     return true;
   }
   
-  ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), Context, ErrMsg);
-  if (!MP)
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
     return true;
   
-  // Get the module from the provider
-  Module* M = MP->materializeModule();
-  if (M == 0) {
-    delete MP;
-    return true;
-  }
-  
   // Get the symbols
   getSymbols(M, symbols);
   
   // Done with the module.
-  delete MP;
+  delete M;
   return true;
 }
 
-ModuleProvider*
+Module*
 llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length,
                         const std::string& ModuleID,
                         LLVMContext& Context,
                         std::vector<std::string>& symbols,
                         std::string* ErrMsg) {
-  // Get the module provider
-  MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str());
+  // Get the module.
+  std::auto_ptr<MemoryBuffer> Buffer(
+    MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str()));
   memcpy((char*)Buffer->getBufferStart(), BufPtr, Length);
   
-  ModuleProvider *MP = getBitcodeModuleProvider(Buffer, Context, ErrMsg);
-  if (!MP)
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
     return 0;
   
-  // Get the module from the provider
-  Module* M = MP->materializeModule();
-  if (M == 0) {
-    delete MP;
-    return 0;
-  }
-  
   // Get the symbols
   getSymbols(M, symbols);
   
-  // Done with the module. Note that ModuleProvider will delete the
-  // Module when it is deleted. Also note that its the caller's responsibility
-  // to delete the ModuleProvider.
-  return MP;
+  // Done with the module. Note that it's the caller's responsibility to delete
+  // the Module.
+  return M;
 }
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index d187ed9..baea544 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -77,11 +77,11 @@ namespace llvm {
                           std::vector<std::string>& symbols,
                           std::string* ErrMsg);
   
-  ModuleProvider* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
-                                    const std::string& ModuleID,
-                                    LLVMContext& Context,
-                                    std::vector<std::string>& symbols,
-                                    std::string* ErrMsg);
+  Module* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length,
+                            const std::string& ModuleID,
+                            LLVMContext& Context,
+                            std::vector<std::string>& symbols,
+                            std::string* ErrMsg);
 }
 
 #endif
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 74895d8..3ef15d2 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -120,7 +120,8 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
       if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
         if (isdigit(Hdr->name[3])) {
           unsigned len = atoi(&Hdr->name[3]);
-          pathname.assign(At, len);
+          const char *nulp = (const char *)memchr(At, '\0', len);
+          pathname.assign(At, nulp != 0 ? nulp - At : len);
           At += len;
           MemberSize -= len;
           flags |= ArchiveMember::HasLongFilenameFlag;
@@ -452,9 +453,9 @@ Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
   return result.release();
 }
 
-// Look up one symbol in the symbol table and return a ModuleProvider for the
-// module that defines that symbol.
-ModuleProvider*
+// Look up one symbol in the symbol table and return the module that defines
+// that symbol.
+Module*
 Archive::findModuleDefiningSymbol(const std::string& symbol, 
                                   std::string* ErrMsg) {
   SymTabType::iterator SI = symTab.find(symbol);
@@ -483,27 +484,27 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
   if (!mbr)
     return 0;
 
-  // Now, load the bitcode module to get the ModuleProvider
+  // Now, load the bitcode module to get the Module.
   std::string FullMemberName = archPath.str() + "(" +
     mbr->getPath().str() + ")";
   MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(),
                                                       FullMemberName.c_str());
   memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize());
   
-  ModuleProvider *mp = getBitcodeModuleProvider(Buffer, Context, ErrMsg);
-  if (!mp)
+  Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!m)
     return 0;
 
-  modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr)));
+  modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
 
-  return mp;
+  return m;
 }
 
 // Look up multiple symbols in the symbol table and return a set of
-// ModuleProviders that define those symbols.
+// Modules that define those symbols.
 bool
 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
-                                    std::set<ModuleProvider*>& result,
+                                    std::set<Module*>& result,
                                     std::string* error) {
   if (!mapfile || !base) {
     if (error)
@@ -536,19 +537,19 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
         std::vector<std::string> symbols;
         std::string FullMemberName = archPath.str() + "(" +
           mbr->getPath().str() + ")";
-        ModuleProvider* MP = 
+        Module* M = 
           GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
                             FullMemberName, Context, symbols, error);
 
-        if (MP) {
+        if (M) {
           // Insert the module's symbols into the symbol table
           for (std::vector<std::string>::iterator I = symbols.begin(),
                E=symbols.end(); I != E; ++I ) {
             symTab.insert(std::make_pair(*I, offset));
           }
-          // Insert the ModuleProvider and the ArchiveMember into the table of
+          // Insert the Module and the ArchiveMember into the table of
           // modules.
-          modules.insert(std::make_pair(offset, std::make_pair(MP, mbr)));
+          modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
         } else {
           if (error)
             *error = "Can't parse bitcode member: " + 
@@ -571,11 +572,11 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
   for (std::set<std::string>::iterator I=symbols.begin(),
        E=symbols.end(); I != E;) {
     // See if this symbol exists
-    ModuleProvider* mp = findModuleDefiningSymbol(*I,error);
-    if (mp) {
-      // The symbol exists, insert the ModuleProvider into our result,
-      // duplicates wil be ignored
-      result.insert(mp);
+    Module* m = findModuleDefiningSymbol(*I,error);
+    if (m) {
+      // The symbol exists, insert the Module into our result, duplicates will
+      // be ignored.
+      result.insert(m);
 
       // Remove the symbol now that its been resolved, being careful to
       // post-increment the iterator.
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index d17f6b5..58fbbf4 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "ArchiveInternals.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/System/Signals.h"
 #include "llvm/System/Process.h"
-#include "llvm/ModuleProvider.h"
+#include "llvm/System/Signals.h"
 #include <fstream>
 #include <ostream>
 #include <iomanip>
@@ -225,12 +225,12 @@ Archive::writeMember(
     std::vector<std::string> symbols;
     std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
       + ")";
-    ModuleProvider* MP = 
+    Module* M = 
       GetBitcodeSymbols((const unsigned char*)data,fSize,
                         FullMemberName, Context, symbols, ErrMsg);
 
     // If the bitcode parsed successfully
-    if ( MP ) {
+    if ( M ) {
       for (std::vector<std::string>::iterator SI = symbols.begin(),
            SE = symbols.end(); SI != SE; ++SI) {
 
@@ -244,7 +244,7 @@ Archive::writeMember(
         }
       }
       // We don't need this module any more.
-      delete MP;
+      delete M;
     } else {
       delete mFile;
       if (ErrMsg)
diff --git a/lib/Archive/Makefile b/lib/Archive/Makefile
index 1256e1c..da97804 100644
--- a/lib/Archive/Makefile
+++ b/lib/Archive/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../..
 LIBRARYNAME = LLVMArchive
-CXXFLAGS = -fno-rtti
 
 # We only want an archive so only those modules actually used by a tool are
 # included.
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 2a926d2..46f3cbc 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -558,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(readnone);
   KEYWORD(readonly);
 
+  KEYWORD(inlinehint);
   KEYWORD(noinline);
   KEYWORD(alwaysinline);
   KEYWORD(optsize);
@@ -569,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
 
   KEYWORD(type);
   KEYWORD(opaque);
+  KEYWORD(union);
 
   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 04a5263c..9cae0d2 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -947,6 +947,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
     case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
     case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_inlinehint:      Attrs |= Attribute::InlineHint; break;
     case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
     case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
     case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
@@ -955,6 +956,14 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
 
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (ParseOptionalStackAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+      continue;
+    }
+
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -962,6 +971,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
       Attrs |= Attribute::constructAlignmentFromInt(Alignment);
       continue;
     }
+
     }
     Lex.Lex();
   }
@@ -1130,6 +1140,25 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
   return false;
 }
 
+/// ParseOptionalStackAlignment
+///   ::= /* empty */
+///   ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_alignstack))
+    return false;
+  LocTy ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::lparen))
+    return Error(ParenLoc, "expected '('");
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::rparen))
+    return Error(ParenLoc, "expected ')'");
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "stack alignment is not a power of two");
+  return false;
+}
 
 /// ParseIndexList - This parses the index list for an insert/extractvalue
 /// instruction.  This sets AteExtraComma in the case where we eat an extra
@@ -1266,6 +1295,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     if (ParseStructType(Result, false))
       return true;
     break;
+  case lltok::kw_union:
+    // TypeRec ::= 'union' '{' ... '}'
+    if (ParseUnionType(Result))
+      return true;
+    break;
   case lltok::lsquare:
     // TypeRec ::= '[' ... ']'
     Lex.Lex(); // eat the lsquare.
@@ -1575,6 +1609,38 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   return false;
 }
 
+/// ParseUnionType
+///   TypeRec
+///     ::= 'union' '{' TypeRec (',' TypeRec)* '}'
+bool LLParser::ParseUnionType(PATypeHolder &Result) {
+  assert(Lex.getKind() == lltok::kw_union);
+  Lex.Lex(); // Consume the 'union'
+
+  if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true;
+
+  SmallVector<PATypeHolder, 8> ParamsList;
+  do {
+    LocTy EltTyLoc = Lex.getLoc();
+    if (ParseTypeRec(Result)) return true;
+    ParamsList.push_back(Result);
+
+    if (Result->isVoidTy())
+      return Error(EltTyLoc, "union element can not have void type");
+    if (!UnionType::isValidElementType(Result))
+      return Error(EltTyLoc, "invalid element type for union");
+
+  } while (EatIfPresent(lltok::comma)) ;
+
+  if (ParseToken(lltok::rbrace, "expected '}' at end of union"))
+    return true;
+
+  SmallVector<const Type*, 8> ParamsListTy;
+  for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
+    ParamsListTy.push_back(ParamsList[i].get());
+  Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size()));
+  return false;
+}
+
 /// ParseArrayVectorType - Parse an array or vector type, assuming the first
 /// token has already been consumed.
 ///   TypeRec
@@ -1991,8 +2057,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     if (Elts.empty())
       return Error(ID.Loc, "constant vector must not be empty");
 
-    if (!Elts[0]->getType()->isInteger() &&
-        !Elts[0]->getType()->isFloatingPoint())
+    if (!Elts[0]->getType()->isIntegerTy() &&
+        !Elts[0]->getType()->isFloatingPointTy())
       return Error(FirstEltLoc,
                    "vector elements must have integer or floating point type");
 
@@ -2134,8 +2200,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
       return true;
 
-    if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
-      return Error(ID.Loc, "extractvalue operand must be array or struct");
+    if (!Val->getType()->isAggregateType())
+      return Error(ID.Loc, "extractvalue operand must be aggregate type");
     if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for extractvalue");
@@ -2155,8 +2221,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseIndexList(Indices) ||
         ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
       return true;
-    if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
-      return Error(ID.Loc, "extractvalue operand must be array or struct");
+    if (!Val0->getType()->isAggregateType())
+      return Error(ID.Loc, "insertvalue operand must be aggregate type");
     if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for insertvalue");
@@ -2184,12 +2250,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
 
     if (Opc == Instruction::FCmp) {
-      if (!Val0->getType()->isFPOrFPVector())
+      if (!Val0->getType()->isFPOrFPVectorTy())
         return Error(ID.Loc, "fcmp requires floating point operands");
       ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
     } else {
       assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
-      if (!Val0->getType()->isIntOrIntVector() &&
+      if (!Val0->getType()->isIntOrIntVectorTy() &&
           !isa<PointerType>(Val0->getType()))
         return Error(ID.Loc, "icmp requires pointer or integer operands");
       ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
@@ -2240,7 +2306,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
-    if (!Val0->getType()->isIntOrIntVector()) {
+    if (!Val0->getType()->isIntOrIntVectorTy()) {
       if (NUW)
         return Error(ModifierLoc, "nuw only applies to integer operations");
       if (NSW)
@@ -2248,8 +2314,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     }
     // API compatibility: Accept either integer or floating-point types with
     // add, sub, and mul.
-    if (!Val0->getType()->isIntOrIntVector() &&
-        !Val0->getType()->isFPOrFPVector())
+    if (!Val0->getType()->isIntOrIntVectorTy() &&
+        !Val0->getType()->isFPOrFPVectorTy())
       return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
     unsigned Flags = 0;
     if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
@@ -2279,7 +2345,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
-    if (!Val0->getType()->isIntOrIntVector())
+    if (!Val0->getType()->isIntOrIntVectorTy())
       return Error(ID.Loc,
                    "constexpr requires integer or integer vector operands");
     ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
@@ -2449,7 +2515,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     V = ConstantInt::get(Context, ID.APSIntVal);
     return false;
   case ValID::t_APFloat:
-    if (!Ty->isFloatingPoint() ||
+    if (!Ty->isFloatingPointTy() ||
         !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
       return Error(ID.Loc, "floating point constant invalid for type");
 
@@ -2492,8 +2558,17 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     V = Constant::getNullValue(Ty);
     return false;
   case ValID::t_Constant:
-    if (ID.ConstantVal->getType() != Ty)
+    if (ID.ConstantVal->getType() != Ty) {
+      // Allow a constant struct with a single member to be converted
+      // to a union, if the union has a member which is the same type
+      // as the struct member.
+      if (const UnionType* utype = dyn_cast<UnionType>(Ty)) {
+        return ParseUnionValue(utype, ID, V);
+      }
+
       return Error(ID.Loc, "constant expression type mismatch");
+    }
+
     V = ID.ConstantVal;
     return false;
   }
@@ -2523,6 +2598,22 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
   return false;
 }
 
+bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) {
+  if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) {
+    if (stype->getNumContainedTypes() != 1)
+      return Error(ID.Loc, "constant expression type mismatch");
+    int index = utype->getElementTypeIndex(stype->getContainedType(0));
+    if (index < 0)
+      return Error(ID.Loc, "initializer type is not a member of the union");
+
+    V = ConstantUnion::get(
+        utype, cast<Constant>(ID.ConstantVal->getOperand(0)));
+    return false;
+  }
+
+  return Error(ID.Loc, "constant expression type mismatch");
+}
+
 
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
@@ -2566,7 +2657,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       return Error(LinkageLoc, "invalid linkage for function declaration");
     break;
   case GlobalValue::AppendingLinkage:
-  case GlobalValue::GhostLinkage:
   case GlobalValue::CommonLinkage:
     return Error(LinkageLoc, "invalid function linkage type");
   }
@@ -2873,7 +2963,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
     // API compatibility: Accept either integer or floating-point types.
     bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
     if (!Result) {
-      if (!Inst->getType()->isIntOrIntVector()) {
+      if (!Inst->getType()->isIntOrIntVectorTy()) {
         if (NUW)
           return Error(ModifierLoc, "nuw only applies to integer operations");
         if (NSW)
@@ -3292,11 +3382,11 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
   switch (OperandType) {
   default: llvm_unreachable("Unknown operand type!");
   case 0: // int or FP.
-    Valid = LHS->getType()->isIntOrIntVector() ||
-            LHS->getType()->isFPOrFPVector();
+    Valid = LHS->getType()->isIntOrIntVectorTy() ||
+            LHS->getType()->isFPOrFPVectorTy();
     break;
-  case 1: Valid = LHS->getType()->isIntOrIntVector(); break;
-  case 2: Valid = LHS->getType()->isFPOrFPVector(); break;
+  case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+  case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
   }
 
   if (!Valid)
@@ -3316,7 +3406,7 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
       ParseValue(LHS->getType(), RHS, PFS))
     return true;
 
-  if (!LHS->getType()->isIntOrIntVector())
+  if (!LHS->getType()->isIntOrIntVectorTy())
     return Error(Loc,"instruction requires integer or integer vector operands");
 
   Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -3340,12 +3430,12 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
     return true;
 
   if (Opc == Instruction::FCmp) {
-    if (!LHS->getType()->isFPOrFPVector())
+    if (!LHS->getType()->isFPOrFPVectorTy())
       return Error(Loc, "fcmp requires floating point operands");
     Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   } else {
     assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
-    if (!LHS->getType()->isIntOrIntVector() &&
+    if (!LHS->getType()->isIntOrIntVectorTy() &&
         !isa<PointerType>(LHS->getType()))
       return Error(Loc, "icmp requires integer operands");
     Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
@@ -3643,7 +3733,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
     }
   }
 
-  if (Size && !Size->getType()->isInteger(32))
+  if (Size && !Size->getType()->isIntegerTy(32))
     return Error(SizeLoc, "element count must be i32");
 
   if (isAlloca) {
@@ -3783,8 +3873,8 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseIndexList(Indices, AteExtraComma))
     return true;
 
-  if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
-    return Error(Loc, "extractvalue operand must be array or struct");
+  if (!Val->getType()->isAggregateType())
+    return Error(Loc, "extractvalue operand must be aggregate type");
 
   if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
                                         Indices.end()))
@@ -3805,8 +3895,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseIndexList(Indices, AteExtraComma))
     return true;
   
-  if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
-    return Error(Loc0, "extractvalue operand must be array or struct");
+  if (!Val0->getType()->isAggregateType())
+    return Error(Loc0, "insertvalue operand must be aggregate type");
 
   if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                         Indices.end()))
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 85c07ff..9abe404 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -31,6 +31,7 @@ namespace llvm {
   class GlobalValue;
   class MDString;
   class MDNode;
+  class UnionType;
 
   /// ValID - Represents a reference of a definition of some sort with no type.
   /// There are several cases where we have to parse the value but where the
@@ -169,6 +170,7 @@ namespace llvm {
     bool ParseOptionalVisibility(unsigned &Visibility);
     bool ParseOptionalCallingConv(CallingConv::ID &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
+    bool ParseOptionalStackAlignment(unsigned &Alignment);
     bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
                                                             MDNode *> > &);
     bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
@@ -211,6 +213,7 @@ namespace llvm {
     }
     bool ParseTypeRec(PATypeHolder &H);
     bool ParseStructType(PATypeHolder &H, bool Packed);
+    bool ParseUnionType(PATypeHolder &H);
     bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
     bool ParseFunctionType(PATypeHolder &Result);
     PATypeHolder HandleUpRefs(const Type *Ty);
@@ -279,6 +282,8 @@ namespace llvm {
       return ParseTypeAndBasicBlock(BB, Loc, PFS);
     }
 
+    bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
+
     struct ParamInfo {
       LocTy Loc;
       Value *V;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 80eb194..3ac9169 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -85,6 +85,7 @@ namespace lltok {
     kw_readnone,
     kw_readonly,
 
+    kw_inlinehint,
     kw_noinline,
     kw_alwaysinline,
     kw_optsize,
@@ -96,6 +97,7 @@ namespace lltok {
 
     kw_type,
     kw_opaque,
+    kw_union,
 
     kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
     kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
diff --git a/lib/AsmParser/Makefile b/lib/AsmParser/Makefile
index 7b53a87..995bb0e 100644
--- a/lib/AsmParser/Makefile
+++ b/lib/AsmParser/Makefile
@@ -10,6 +10,5 @@
 LEVEL = ../..
 LIBRARYNAME := LLVMAsmParser
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 32b97e8..7537435 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -21,17 +21,8 @@ using namespace llvm;
    Optionally returns a human-readable error message via OutMessage. */
 LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
                           LLVMModuleRef *OutModule, char **OutMessage) {
-  std::string Message;
-  
-  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(),  
-                                     &Message));
-  if (!*OutModule) {
-    if (OutMessage)
-      *OutMessage = strdup(Message.c_str());
-    return 1;
-  }
-  
-  return 0;
+  return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+                                   OutMessage);
 }
 
 LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
@@ -57,18 +48,8 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
 LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
                                       LLVMModuleProviderRef *OutMP,
                                       char **OutMessage) {
-  std::string Message;
-
-  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), 
-                                         &Message));
-                                         
-  if (!*OutMP) {
-    if (OutMessage)
-      *OutMessage = strdup(Message.c_str());
-      return 1;
-  }
-
-  return 0;
+  return LLVMGetBitcodeModuleProviderInContext(wrap(&getGlobalContext()),
+                                               MemBuf, OutMP, OutMessage);
 }
 
 LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
@@ -77,8 +58,8 @@ LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
                                                char **OutMessage) {
   std::string Message;
   
-  *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef),
-                                         &Message));
+  *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
+    getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message));
   if (!*OutMP) {
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 2549a51..cebfbf6 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -28,7 +28,8 @@
 using namespace llvm;
 
 void BitcodeReader::FreeState() {
-  delete Buffer;
+  if (BufferOwned)
+    delete Buffer;
   Buffer = 0;
   std::vector<PATypeHolder>().swap(TypeList);
   ValueList.clear();
@@ -107,17 +108,17 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
   switch (Val) {
   default: return -1;
   case bitc::BINOP_ADD:
-    return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
   case bitc::BINOP_SUB:
-    return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
   case bitc::BINOP_MUL:
-    return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
   case bitc::BINOP_UDIV: return Instruction::UDiv;
   case bitc::BINOP_SDIV:
-    return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
   case bitc::BINOP_UREM: return Instruction::URem;
   case bitc::BINOP_SREM:
-    return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
   case bitc::BINOP_SHL:  return Instruction::Shl;
   case bitc::BINOP_LSHR: return Instruction::LShr;
   case bitc::BINOP_ASHR: return Instruction::AShr;
@@ -584,6 +585,13 @@ bool BitcodeReader::ParseTypeTable() {
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_UNION: {  // UNION: [eltty x N]
+      SmallVector<const Type*, 8> EltTys;
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        EltTys.push_back(getTypeByID(Record[i], true));
+      ResultTy = UnionType::get(&EltTys[0], EltTys.size());
+      break;
+    }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
@@ -1167,7 +1175,7 @@ bool BitcodeReader::ParseConstants() {
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
 
-      if (OpTy->isFPOrFPVector())
+      if (OpTy->isFPOrFPVectorTy())
         V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
       else
         V = ConstantExpr::getICmp(Record[3], Op0, Op1);
@@ -1241,11 +1249,7 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
 
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
-  DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage());
-
-  // Set the functions linkage to GhostLinkage so we know it is lazily
-  // deserialized.
-  Fn->setLinkage(GlobalValue::GhostLinkage);
+  DeferredFunctionInfo[Fn] = CurBit;
 
   // Skip over the function block for now.
   if (Stream.SkipBlock())
@@ -1253,17 +1257,10 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
   return false;
 }
 
-bool BitcodeReader::ParseModule(const std::string &ModuleID) {
-  // Reject multiple MODULE_BLOCK's in a single bitstream.
-  if (TheModule)
-    return Error("Multiple MODULE_BLOCKs in same stream");
-
+bool BitcodeReader::ParseModule() {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return Error("Malformed block record");
 
-  // Otherwise, create the module.
-  TheModule = new Module(ModuleID, Context);
-
   SmallVector<uint64_t, 64> Record;
   std::vector<std::string> SectionTable;
   std::vector<std::string> GCTable;
@@ -1520,7 +1517,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
   return Error("Premature end of bitstream");
 }
 
-bool BitcodeReader::ParseBitcode() {
+bool BitcodeReader::ParseBitcodeInto(Module *M) {
   TheModule = 0;
 
   if (Buffer->getBufferSize() & 3)
@@ -1564,7 +1561,11 @@ bool BitcodeReader::ParseBitcode() {
         return Error("Malformed BlockInfoBlock");
       break;
     case bitc::MODULE_BLOCK_ID:
-      if (ParseModule(Buffer->getBufferIdentifier()))
+      // Reject multiple MODULE_BLOCK's in a single bitstream.
+      if (TheModule)
+        return Error("Multiple MODULE_BLOCKs in same stream");
+      TheModule = M;
+      if (ParseModule())
         return true;
       break;
     default:
@@ -1702,12 +1703,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         if (Opc == Instruction::Add ||
             Opc == Instruction::Sub ||
             Opc == Instruction::Mul) {
-          if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+          if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
-          if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+          if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
         } else if (Opc == Instruction::SDiv) {
-          if (Record[3] & (1 << bitc::SDIV_EXACT))
+          if (Record[OpNum] & (1 << bitc::SDIV_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
         }
       }
@@ -1891,7 +1892,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 != Record.size())
         return Error("Invalid CMP record");
 
-      if (LHS->getType()->isFPOrFPVector())
+      if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
       else
         I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -2299,22 +2300,28 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 }
 
 //===----------------------------------------------------------------------===//
-// ModuleProvider implementation
+// GVMaterializer implementation
 //===----------------------------------------------------------------------===//
 
 
-bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) {
-  // If it already is material, ignore the request.
-  if (!F->hasNotBeenReadFromBitcode()) return false;
+bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
+  if (const Function *F = dyn_cast<Function>(GV)) {
+    return F->isDeclaration() &&
+      DeferredFunctionInfo.count(const_cast<Function*>(F));
+  }
+  return false;
+}
 
-  DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator DFII =
-    DeferredFunctionInfo.find(F);
+bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  Function *F = dyn_cast<Function>(GV);
+  // If it's not a function or is already material, ignore the request.
+  if (!F || !F->isMaterializable()) return false;
+
+  DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
 
-  // Move the bit stream to the saved position of the deferred function body and
-  // restore the real linkage type for the function.
-  Stream.JumpToBit(DFII->second.first);
-  F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second);
+  // Move the bit stream to the saved position of the deferred function body.
+  Stream.JumpToBit(DFII->second);
 
   if (ParseFunctionBody(F)) {
     if (ErrInfo) *ErrInfo = ErrorString;
@@ -2336,27 +2343,36 @@ bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) {
   return false;
 }
 
-void BitcodeReader::dematerializeFunction(Function *F) {
-  // If this function isn't materialized, or if it is a proto, this is a noop.
-  if (F->hasNotBeenReadFromBitcode() || F->isDeclaration())
+bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F || F->isDeclaration())
+    return false;
+  return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void BitcodeReader::Dematerialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If this function isn't dematerializable, this is a noop.
+  if (!F || !isDematerializable(F))
     return;
 
   assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
 
   // Just forget the function body, we can remat it later.
   F->deleteBody();
-  F->setLinkage(GlobalValue::GhostLinkage);
 }
 
 
-Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
+bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+  assert(M == TheModule &&
+         "Can only Materialize the Module this BitcodeReader is attached to.");
   // Iterate over the module, deserializing any functions that are still on
   // disk.
   for (Module::iterator F = TheModule->begin(), E = TheModule->end();
        F != E; ++F)
-    if (F->hasNotBeenReadFromBitcode() &&
-        materializeFunction(F, ErrInfo))
-      return 0;
+    if (F->isMaterializable() &&
+        Materialize(F, ErrInfo))
+      return true;
 
   // Upgrade any intrinsic calls that slipped through (should not happen!) and
   // delete the old functions to clean up. We can't do this unless the entire
@@ -2380,19 +2396,7 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
   // Check debug info intrinsics.
   CheckDebugInfoIntrinsics(TheModule);
 
-  return TheModule;
-}
-
-
-/// This method is provided by the parent ModuleProvde class and overriden
-/// here. It simply releases the module from its provided and frees up our
-/// state.
-/// @brief Release our hold on the generated module
-Module *BitcodeReader::releaseModule(std::string *ErrInfo) {
-  // Since we're losing control of this Module, we must hand it back complete
-  Module *M = ModuleProvider::releaseModule(ErrInfo);
-  FreeState();
-  return M;
+  return false;
 }
 
 
@@ -2400,45 +2404,41 @@ Module *BitcodeReader::releaseModule(std::string *ErrInfo) {
 // External interface
 //===----------------------------------------------------------------------===//
 
-/// getBitcodeModuleProvider - lazy function-at-a-time loading from a file.
+/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
 ///
-ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer,
-                                               LLVMContext& Context,
-                                               std::string *ErrMsg) {
+Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
+                                   LLVMContext& Context,
+                                   std::string *ErrMsg) {
+  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
   BitcodeReader *R = new BitcodeReader(Buffer, Context);
-  if (R->ParseBitcode()) {
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
     if (ErrMsg)
       *ErrMsg = R->getErrorString();
 
-    // Don't let the BitcodeReader dtor delete 'Buffer'.
-    R->releaseMemoryBuffer();
-    delete R;
+    delete M;  // Also deletes R.
     return 0;
   }
-  return R;
+  // Have the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(true);
+  return M;
 }
 
 /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
 /// If an error occurs, return null and fill in *ErrMsg if non-null.
 Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
                                std::string *ErrMsg){
-  BitcodeReader *R;
-  R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, Context,
-                                                           ErrMsg));
-  if (!R) return 0;
-
-  // Read in the entire module.
-  Module *M = R->materializeModule(ErrMsg);
+  Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!M) return 0;
 
   // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
   // there was an error.
-  R->releaseMemoryBuffer();
+  static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
 
-  // If there was no error, tell ModuleProvider not to delete it when its dtor
-  // is run.
-  if (M)
-    M = R->releaseModule(ErrMsg);
-
-  delete R;
+  // Read in the entire module, and destroy the BitcodeReader.
+  if (M->MaterializeAllPermanently(ErrMsg)) {
+    delete M;
+    return NULL;
+  }
   return M;
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index bb3961a..55c71f7 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -14,7 +14,7 @@
 #ifndef BITCODE_READER_H
 #define BITCODE_READER_H
 
-#include "llvm/ModuleProvider.h"
+#include "llvm/GVMaterializer.h"
 #include "llvm/Attributes.h"
 #include "llvm/Type.h"
 #include "llvm/OperandTraits.h"
@@ -121,9 +121,11 @@ public:
   void AssignValue(Value *V, unsigned Idx);
 };
 
-class BitcodeReader : public ModuleProvider {
+class BitcodeReader : public GVMaterializer {
   LLVMContext &Context;
+  Module *TheModule;
   MemoryBuffer *Buffer;
+  bool BufferOwned;
   BitstreamReader StreamFile;
   BitstreamCursor Stream;
   
@@ -160,9 +162,9 @@ class BitcodeReader : public ModuleProvider {
   bool HasReversedFunctionsWithBodies;
   
   /// DeferredFunctionInfo - When function bodies are initially scanned, this
-  /// map contains info about where to find deferred function body (in the
-  /// stream) and what linkage the original function had.
-  DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo;
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
   
   /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
   /// are resolved lazily when functions are loaded.
@@ -171,7 +173,8 @@ class BitcodeReader : public ModuleProvider {
   
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
-    : Context(C), Buffer(buffer), ErrorString(0), ValueList(C), MDValueList(C) {
+    : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+      ErrorString(0), ValueList(C), MDValueList(C) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
@@ -180,17 +183,15 @@ public:
   
   void FreeState();
   
-  /// releaseMemoryBuffer - This causes the reader to completely forget about
-  /// the memory buffer it contains, which prevents the buffer from being
-  /// destroyed when it is deleted.
-  void releaseMemoryBuffer() {
-    Buffer = 0;
-  }
+  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+  /// when the reader is destroyed.
+  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
   
-  virtual bool materializeFunction(Function *F, std::string *ErrInfo = 0);
-  virtual Module *materializeModule(std::string *ErrInfo = 0);
-  virtual void dematerializeFunction(Function *F);
-  virtual Module *releaseModule(std::string *ErrInfo = 0);
+  virtual bool isMaterializable(const GlobalValue *GV) const;
+  virtual bool isDematerializable(const GlobalValue *GV) const;
+  virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+  virtual void Dematerialize(GlobalValue *GV);
 
   bool Error(const char *Str) {
     ErrorString = Str;
@@ -200,7 +201,7 @@ public:
   
   /// @brief Main interface to parsing a bitcode buffer.
   /// @returns true if an error occurred.
-  bool ParseBitcode();
+  bool ParseBitcodeInto(Module *M);
 private:
   const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
   Value *getFnValueByID(unsigned ID, const Type *Ty) {
@@ -248,7 +249,7 @@ private:
   }
 
   
-  bool ParseModule(const std::string &ModuleID);
+  bool ParseModule();
   bool ParseAttributeBlock();
   bool ParseTypeTable();
   bool ParseTypeSymbolTable();
diff --git a/lib/Bitcode/Reader/Makefile b/lib/Bitcode/Reader/Makefile
index 0aae3bf..59af8d53 100644
--- a/lib/Bitcode/Reader/Makefile
+++ b/lib/Bitcode/Reader/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMBitReader
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5a4a1b2..82e73b5 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -181,6 +181,14 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
 
+  // Abbrev for TYPE_CODE_UNION.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
+
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -250,6 +258,17 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
       AbbrevToUse = StructAbbrev;
       break;
     }
+    case Type::UnionTyID: {
+      const UnionType *UT = cast<UnionType>(T);
+      // UNION: [eltty x N]
+      Code = bitc::TYPE_CODE_UNION;
+      // Output all of the element types.
+      for (UnionType::element_iterator I = UT->element_begin(),
+           E = UT->element_end(); I != E; ++I)
+        TypeVals.push_back(VE.getTypeID(*I));
+      AbbrevToUse = UnionAbbrev;
+      break;
+    }
     case Type::ArrayTyID: {
       const ArrayType *AT = cast<ArrayType>(T);
       // ARRAY: [numelts, eltty]
@@ -280,7 +299,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 static unsigned getEncodedLinkage(const GlobalValue *GV) {
   switch (GV->getLinkage()) {
   default: llvm_unreachable("Invalid linkage!");
-  case GlobalValue::GhostLinkage:  // Map ghost linkage onto external.
   case GlobalValue::ExternalLinkage:            return 0;
   case GlobalValue::WeakAnyLinkage:             return 1;
   case GlobalValue::AppendingLinkage:           return 2;
@@ -499,7 +517,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
   for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
 
     if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
-      if (!N->isFunctionLocal()) {
+      if (!N->isFunctionLocal() || !N->getFunction()) {
         if (!StartedMetadataBlock) {
           Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
           StartedMetadataBlock = true;
@@ -563,7 +581,7 @@ static void WriteFunctionLocalMetadata(const Function &F,
   
   for (unsigned i = 0, e = Vals.size(); i != e; ++i)
     if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first))
-      if (N->getFunction() == &F) {
+      if (N->isFunctionLocal() && N->getFunction() == &F) {
         if (!StartedMetadataBlock) {
           Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
           StartedMetadataBlock = true;
@@ -790,7 +808,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantVector>(V)) {
+               isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -1511,16 +1529,50 @@ enum {
   DarwinBCHeaderSize = 5*4
 };
 
+/// isARMTriplet - Return true if the triplet looks like:
+/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
+static bool isARMTriplet(const std::string &TT) {
+  size_t Pos = 0;
+  size_t Size = TT.size();
+  if (Size >= 6 &&
+      TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
+      TT[3] == 'm' && TT[4] == 'b')
+    Pos = 5;
+  else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
+    Pos = 3;
+  else
+    return false;
+
+  if (TT[Pos] == '-')
+    return true;
+  else if (TT[Pos] == 'v') {
+    if (Size >= Pos+4 &&
+        TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
+      return true;
+    else if (Size >= Pos+4 &&
+             TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
+      return true;
+  } else
+    return false;
+  while (++Pos < Size && TT[Pos] != '-') {
+    if (!isdigit(TT[Pos]))
+      return false;
+  }
+  return true;
+}
+
 static void EmitDarwinBCHeader(BitstreamWriter &Stream,
                                const std::string &TT) {
   unsigned CPUType = ~0U;
 
-  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
-  // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+  // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+  // number from /usr/include/mach/machine.h.  It is ok to reproduce the
   // specific constants here because they are implicitly part of the Darwin ABI.
   enum {
     DARWIN_CPU_ARCH_ABI64      = 0x01000000,
     DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_ARM        = 12,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
 
@@ -1533,6 +1585,8 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     CPUType = DARWIN_CPU_TYPE_POWERPC;
   else if (TT.find("powerpc64-") == 0)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  else if (isARMTriplet(TT))
+    CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
   unsigned BCOffset = DarwinBCHeaderSize;
diff --git a/lib/Bitcode/Writer/Makefile b/lib/Bitcode/Writer/Makefile
index 5f9742e..7b0bd72 100644
--- a/lib/Bitcode/Writer/Makefile
+++ b/lib/Bitcode/Writer/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMBitWriter
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index c46d735..595497f 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -93,7 +93,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
         for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
              OI != E; ++OI) {
           if (MDNode *MD = dyn_cast<MDNode>(*OI))
-            if (MD->isFunctionLocal())
+            if (MD->isFunctionLocal() && MD->getFunction())
               // These will get enumerated during function-incorporation.
               continue;
           EnumerateOperandType(*OI);
@@ -408,21 +408,25 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
 
   FirstInstID = Values.size();
 
+  SmallVector<MDNode *, 8> FunctionLocalMDs;
   // Add all of the instructions.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
       for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
            OI != E; ++OI) {
         if (MDNode *MD = dyn_cast<MDNode>(*OI))
-          if (!MD->isFunctionLocal())
-              // These were already enumerated during ValueEnumerator creation.
-              continue;
-        EnumerateOperandType(*OI);
+          if (MD->isFunctionLocal() && MD->getFunction())
+            // Enumerate metadata after the instructions they might refer to.
+            FunctionLocalMDs.push_back(MD);
       }
       if (!I->getType()->isVoidTy())
         EnumerateValue(I);
     }
   }
+
+  // Add all of the function-local metadata.
+  for (unsigned i = 0, e = FunctionLocalMDs.size(); i != e; ++i)
+    EnumerateOperandType(FunctionLocalMDs[i]);
 }
 
 void ValueEnumerator::purgeFunction() {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index ca1f4a3..8840622f 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -425,8 +425,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
     // Ignore KILLs and passthru registers for liveness...
-    if ((MI->getOpcode() == TargetInstrInfo::KILL) ||
-        (PassthruRegs.count(Reg) != 0))
+    if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
       continue;
 
     // Update def for Reg and aliases.
@@ -481,7 +480,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 
   // Form a group of all defs and uses of a KILL instruction to ensure
   // that all registers are renamed as a group.
-  if (MI->getOpcode() == TargetInstrInfo::KILL) {
+  if (MI->isKill()) {
     DEBUG(dbgs() << "\tKill Group:");
 
     unsigned FirstReg = 0;
@@ -792,7 +791,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // Ignore KILL instructions (they form a group in ScanInstruction
     // but don't cause any anti-dependence breaking themselves)
-    if (MI->getOpcode() != TargetInstrInfo::KILL) {
+    if (!MI->isKill()) {
       // Attempt to break each anti-dependency...
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
         SDep *Edge = Edges[i];
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f4d8864..fc08384 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/DerivedTypes.h"
@@ -24,6 +25,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -31,10 +33,6 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
@@ -45,37 +43,27 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
 #include <cerrno>
 using namespace llvm;
 
-static cl::opt<cl::boolOrDefault>
-AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
-           cl::init(cl::BOU_UNSET));
-
-static bool getVerboseAsm(bool VDef) {
-  switch (AsmVerbose) {
-  default:
-  case cl::BOU_UNSET: return VDef;
-  case cl::BOU_TRUE:  return true;
-  case cl::BOU_FALSE: return false;
-  }      
-}
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
 AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
-                       const MCAsmInfo *T, bool VDef)
-  : MachineFunctionPass(&ID), FunctionNumber(0), O(o),
+                       MCContext &Ctx, MCStreamer &Streamer,
+                       const MCAsmInfo *T)
+  : MachineFunctionPass(&ID), O(o),
     TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
-
-    OutContext(*new MCContext()),
-    // FIXME: Pass instprinter to streamer.
-    OutStreamer(*createAsmStreamer(OutContext, O, *T,
-                                   TM.getTargetData()->isLittleEndian(),
-                                   getVerboseAsm(VDef), 0)),
-
+    OutContext(Ctx), OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), PrevDLT(NULL) {
   DW = 0; MMI = 0;
-  VerboseAsm = getVerboseAsm(VDef);
+  VerboseAsm = Streamer.isVerboseAsm();
 }
 
 AsmPrinter::~AsmPrinter() {
@@ -87,6 +75,12 @@ AsmPrinter::~AsmPrinter() {
   delete &OutContext;
 }
 
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+  return MF->getFunctionNumber();
+}
+
 TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
@@ -115,11 +109,11 @@ bool AsmPrinter::doInitialization(Module &M) {
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
 
+  // Very minimal debug info. It is ignored if we emit actual debug info. If we
+  // don't, this at least helps the user find where a global came from.
   if (MAI->hasSingleParameterDotFile()) {
-    // Very minimal debug info. It is ignored if we emit actual
-    // debug info. If we don't, this at least helps the user find where
-    // a function came from.
-    O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n";
+    // .file "foo.c"
+    OutStreamer.EmitFileDirective(M.getModuleIdentifier());
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -144,6 +138,52 @@ bool AsmPrinter::doInitialization(Module &M) {
   return false;
 }
 
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+  switch ((GlobalValue::LinkageTypes)Linkage) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    if (MAI->getWeakDefDirective() != 0) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      // .weak_definition _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+    } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      // FIXME: linkonce should be a section attribute, handled by COFF Section
+      // assignment.
+      // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce
+      // .linkonce discard
+      // FIXME: It would be nice to use .linkonce samesize for non-common
+      // globals.
+      O << LinkOnce;
+    } else {
+      // .weak _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    }
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol.
+    // .globl _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    break;
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::InternalLinkage:
+    break;
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+}
+
+
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
 void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (!GV->hasInitializer())   // External globals require no code.
@@ -154,15 +194,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     return;
 
   MCSymbol *GVSym = GetGlobalValueSymbol(GV);
-  printVisibility(GVSym, GV->getVisibility());
+  EmitVisibility(GVSym, GV->getVisibility());
 
-  if (MAI->hasDotTypeDotSizeDirective()) {
-    O << "\t.type\t" << *GVSym;
-    if (MAI->getCommentString()[0] != '@')
-      O << ",@object\n";
-    else
-      O << ",%object\n";
-  }
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
   
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
@@ -224,47 +259,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   OutStreamer.SwitchSection(TheSection);
 
-  // TODO: Factor into an 'emit linkage' thing that is shared with function
-  // bodies.
-  switch (GV->getLinkage()) {
-  case GlobalValue::CommonLinkage:
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:
-  case GlobalValue::LinkerPrivateLinkage:
-    if (MAI->getWeakDefDirective() != 0) {
-      // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
-      // .weak_definition _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
-    } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) {
-      // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
-      // .linkonce same_size
-      O << LinkOnce;
-    } else {
-      // .weak _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
-    }
-    break;
-  case GlobalValue::DLLExportLinkage:
-  case GlobalValue::AppendingLinkage:
-    // FIXME: appending linkage variables should go into a section of
-    // their name or something.  For now, just emit them as external.
-  case GlobalValue::ExternalLinkage:
-    // If external or appending, declare as a global symbol.
-    // .globl _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
-    break;
-  case GlobalValue::PrivateLinkage:
-  case GlobalValue::InternalLinkage:
-     break;
-  default:
-    llvm_unreachable("Unknown linkage type!");
-  }
-
+  EmitLinkage(GV->getLinkage(), GVSym);
   EmitAlignment(AlignLog, GV);
+
   if (VerboseAsm) {
     WriteAsOperand(OutStreamer.GetCommentOS(), GV,
                    /*PrintType=*/false, GV->getParent());
@@ -275,7 +272,185 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   EmitGlobalConstant(GV->getInitializer());
 
   if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size\t" << *GVSym << ", " << Size << '\n';
+    // .size foo, 42
+    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+  
+  OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+  // Print out constants referenced by the function
+  EmitConstantPool();
+  
+  // Print the 'header' of function.
+  const Function *F = MF->getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitVisibility(CurrentFnSym, F->getVisibility());
+
+  EmitLinkage(F->getLinkage(), CurrentFnSym);
+  EmitAlignment(MF->getAlignment(), F);
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+  if (VerboseAsm) {
+    WriteAsOperand(OutStreamer.GetCommentOS(), F,
+                   /*PrintType=*/false, F->getParent());
+    OutStreamer.GetCommentOS() << '\n';
+  }
+
+  // Emit the CurrentFnSym.  This is a virtual function to allow targets to
+  // do their wild and crazy things as required.
+  EmitFunctionEntryLabel();
+  
+  // Add some workaround for linkonce linkage on Cygwin\MinGW.
+  if (MAI->getLinkOnceDirective() != 0 &&
+      (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+    // FIXME: What is this?
+    O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n";
+  
+  // Emit pre-function debug and/or EH information.
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+    DW->BeginFunction(MF);
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function.  This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const TargetMachine &TM = MF->getTarget();
+  
+  if (!MI.getDebugLoc().isUnknown()) {
+    DILocation DLT = MF->getDILocation(MI.getDebugLoc());
+    
+    // Print source line info.
+    DIScope Scope = DLT.getScope();
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (!Scope.isNull())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DLT.getLineNumber();
+    if (DLT.getColumnNumber() != 0)
+      CommentOS << ':' << DLT.getColumnNumber();
+    CommentOS << '\n';
+  }
+  
+  // Check for spills and reloads
+  int FI;
+  
+  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  
+  // We assume a single instruction only has a spill or reload, not
+  // both.
+  const MachineMemOperand *MMO;
+  if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Reload\n";
+    }
+  } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+  } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Spill\n";
+    }
+  } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+  }
+  
+  // Check for spill-induced copies
+  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+  if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
+                                     SrcSubIdx, DstSubIdx)) {
+    if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+      CommentOS << " Reload Reuse\n";
+  }
+}
+
+
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+  // Emit target-specific gunk before the function body.
+  EmitFunctionBodyStart();
+  
+  // Print out code for the function.
+  bool HasAnyRealCode = false;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      // Print the assembly for the instruction.
+      if (!II->isLabel())
+        HasAnyRealCode = true;
+      
+      ++EmittedInsts;
+      
+      // FIXME: Clean up processDebugLoc.
+      processDebugLoc(II, true);
+      
+      if (VerboseAsm)
+        EmitComments(*II, OutStreamer.GetCommentOS());
+
+      switch (II->getOpcode()) {
+      case TargetOpcode::DBG_LABEL:
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        printLabelInst(II);
+        break;
+      case TargetOpcode::INLINEASM:
+        printInlineAsm(II);
+        break;
+      case TargetOpcode::IMPLICIT_DEF:
+        printImplicitDef(II);
+        break;
+      case TargetOpcode::KILL:
+        printKill(II);
+        break;
+      default:
+        EmitInstruction(II);
+        break;
+      }
+      
+      // FIXME: Clean up processDebugLoc.
+      processDebugLoc(II, false);
+    }
+  }
+  
+  // If the function is empty and the object file uses .subsections_via_symbols,
+  // then we need to emit *something* to the function body to prevent the
+  // labels from collapsing together.  Just emit a 0 byte.
+  if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)
+    OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/);
+  
+  // Emit target-specific gunk after the function body.
+  EmitFunctionBodyEnd();
+  
+  if (MAI->hasDotTypeDotSizeDirective())
+    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
+  
+  // Emit post-function debug information.
+  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
+    DW->EndFunction(MF);
+  
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo();
   
   OutStreamer.AddBlankLine();
 }
@@ -313,7 +488,7 @@ bool AsmPrinter::doFinalization(Module &M) {
     }
   }
 
-  if (MAI->getSetDirective()) {
+  if (MAI->hasSetDirective()) {
     OutStreamer.AddBlankLine();
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
          I != E; ++I) {
@@ -329,9 +504,11 @@ bool AsmPrinter::doFinalization(Module &M) {
       else
         assert(I->hasLocalLinkage() && "Invalid alias linkage");
 
-      printVisibility(Name, I->getVisibility());
+      EmitVisibility(Name, I->getVisibility());
 
-      O << MAI->getSetDirective() << ' ' << *Name << ", " << *Target << '\n';
+      // Emit the directives as assignments aka .set:
+      OutStreamer.EmitAssignment(Name, 
+                                 MCSymbolRefExpr::Create(Target, OutContext));
     }
   }
 
@@ -360,9 +537,9 @@ bool AsmPrinter::doFinalization(Module &M) {
 }
 
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
   // Get the function symbol.
   CurrentFnSym = GetGlobalValueSymbol(MF.getFunction());
-  IncrementFunctionNumber();
 
   if (VerboseAsm)
     LI = &getAnalysis<MachineLoopInfo>();
@@ -383,7 +560,8 @@ namespace {
 /// used to print out constants which have been "spilled to memory" by
 /// the code generator.
 ///
-void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
+void AsmPrinter::EmitConstantPool() {
+  const MachineConstantPool *MCP = MF->getConstantPool();
   const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
   if (CP.empty()) return;
 
@@ -470,27 +648,26 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
 /// EmitJumpTableInfo - Print assembly representations of the jump tables used
 /// by the current function to the current output stream.  
 ///
-void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
-                                   MachineFunction &MF) {
+void AsmPrinter::EmitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (MJTI == 0) return;
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
-  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
-  
   // Pick the directive to use to print the jump table entries, and switch to 
   // the appropriate section.
-  TargetLowering *LoweringInfo = TM.getTargetLowering();
-
-  const Function *F = MF.getFunction();
+  const Function *F = MF->getFunction();
   bool JTInDiffSection = false;
-  if (F->isWeakForLinker() ||
-      (IsPic && !LoweringInfo->usesGlobalOffsetTable())) {
-    // In PIC mode, we need to emit the jump table to the same section as the
-    // function body itself, otherwise the label differences won't make sense.
-    // We should also do if the section name is NULL or function is declared in
-    // discardable section.
-    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang,
-                                                                    TM));
+  if (// In PIC mode, we need to emit the jump table to the same section as the
+      // function body itself, otherwise the label differences won't make sense.
+      // FIXME: Need a better predicate for this: what about custom entries?
+      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+      // We should also do if the section name is NULL or function is declared
+      // in discardable section
+      // FIXME: this isn't the right predicate, should be based on the MCSection
+      // for the function.
+      F->isWeakForLinker()) {
+    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
   } else {
     // Otherwise, drop it in the readonly section.
     const MCSection *ReadOnlySection = 
@@ -498,73 +675,106 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     OutStreamer.SwitchSection(ReadOnlySection);
     JTInDiffSection = true;
   }
+
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
   
-  EmitAlignment(Log2_32(MJTI->getAlignment()));
-  
-  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
-    const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs;
+  for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
     
     // If this jump table was deleted, ignore it. 
     if (JTBBs.empty()) continue;
 
-    // For PIC codegen, if possible we want to use the SetDirective to reduce
-    // the number of relocations the assembler will generate for the jump table.
-    // Set directives are all printed before the jump table itself.
-    SmallPtrSet<MachineBasicBlock*, 16> EmittedSets;
-    if (MAI->getSetDirective() && IsPic)
-      for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
-        if (EmittedSets.insert(JTBBs[ii]))
-          printPICJumpTableSetLabel(i, JTBBs[ii]);
+    // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+    // .set directive for each unique entry.  This reduces the number of
+    // relocations the assembler will generate for the jump table.
+    if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+        MAI->hasSetDirective()) {
+      SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+      const TargetLowering *TLI = TM.getTargetLowering();
+      const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+      for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+        const MachineBasicBlock *MBB = JTBBs[ii];
+        if (!EmittedSets.insert(MBB)) continue;
+        
+        // .set LJTSet, LBB32-base
+        const MCExpr *LHS =
+          MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+        OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+                                MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+      }
+    }          
     
     // On some targets (e.g. Darwin) we want to emit two consequtive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
     if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
-      OutStreamer.EmitLabel(GetJTISymbol(i, true));
+      // FIXME: This doesn't have to have any specific name, just any randomly
+      // named and numbered 'l' label would work.  Simplify GetJTISymbol.
+      OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
 
-    OutStreamer.EmitLabel(GetJTISymbol(i));
+    OutStreamer.EmitLabel(GetJTISymbol(JTI));
 
-    for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
-      printPICJumpTableEntry(MJTI, JTBBs[ii], i);
-      O << '\n';
-    }
+    for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+      EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
   }
 }
 
-void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
-                                        const MachineBasicBlock *MBB,
-                                        unsigned uid)  const {
-  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
-  
-  // Use JumpTableDirective otherwise honor the entry size from the jump table
-  // info.
-  const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC);
-  bool HadJTEntryDirective = JTEntryDirective != NULL;
-  if (!HadJTEntryDirective) {
-    JTEntryDirective = MJTI->getEntrySize() == 4 ?
-      MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                    const MachineBasicBlock *MBB,
+                                    unsigned UID) const {
+  const MCExpr *Value = 0;
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Custom32:
+    Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+                                                              OutContext);
+    break;
+  case MachineJumpTableInfo::EK_BlockAddress:
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    break;
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+    // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+    // with a relocation as gp-relative, e.g.:
+    //     .gprel32 LBB123
+    MCSymbol *MBBSym = MBB->getSymbol(OutContext);
+    OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    return;
   }
 
-  O << JTEntryDirective << ' ';
-
-  // If we have emitted set directives for the jump table entries, print 
-  // them rather than the entries themselves.  If we're emitting PIC, then
-  // emit the table entries as differences between two text section labels.
-  // If we're emitting non-PIC code, then emit the entries as direct
-  // references to the target basic blocks.
-  if (!isPIC) {
-    O << *GetMBBSymbol(MBB->getNumber());
-  } else if (MAI->getSetDirective()) {
-    O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
-      << '_' << uid << "_set_" << MBB->getNumber();
-  } else {
-    O << *GetMBBSymbol(MBB->getNumber());
-    // If the arch uses custom Jump Table directives, don't calc relative to
-    // JT.
-    if (!HadJTEntryDirective) 
-      O << '-' << *GetJTISymbol(uid);
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    // EK_LabelDifference32 - Each entry is the address of the block minus
+    // the address of the jump table.  This is used for PIC jump tables where
+    // gprel32 is not supported.  e.g.:
+    //      .word LBB123 - LJTI1_2
+    // If the .set directive is supported, this is emitted as:
+    //      .set L4_5_set_123, LBB123 - LJTI1_2
+    //      .word L4_5_set_123
+    
+    // If we have emitted set directives for the jump table entries, print 
+    // them rather than the entries themselves.  If we're emitting PIC, then
+    // emit the table entries as differences between two text section labels.
+    if (MAI->hasSetDirective()) {
+      // If we used .set, reference the .set's symbol.
+      Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+                                      OutContext);
+      break;
+    }
+    // Otherwise, use the difference as the jump table entry.
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+    Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+    break;
+  }
   }
+  
+  assert(Value && "Unknown entry kind!");
+ 
+  unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+  OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
 }
 
 
@@ -683,48 +893,6 @@ void AsmPrinter::EmitInt64(uint64_t Value) const {
   OutStreamer.EmitIntValue(Value, 8, 0/*addrspace*/);
 }
 
-
-/// toOctal - Convert the low order bits of X into an octal digit.
-///
-static inline char toOctal(int X) {
-  return (X&7)+'0';
-}
-
-/// printStringChar - Print a char, escaped if necessary.
-///
-static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
-  if (C == '"') {
-    O << "\\\"";
-  } else if (C == '\\') {
-    O << "\\\\";
-  } else if (isprint((unsigned char)C)) {
-    O << C;
-  } else {
-    switch(C) {
-    case '\b': O << "\\b"; break;
-    case '\f': O << "\\f"; break;
-    case '\n': O << "\\n"; break;
-    case '\r': O << "\\r"; break;
-    case '\t': O << "\\t"; break;
-    default:
-      O << '\\';
-      O << toOctal(C >> 6);
-      O << toOctal(C >> 3);
-      O << toOctal(C >> 0);
-      break;
-    }
-  }
-}
-
-/// EmitFile - Emit a .file directive.
-void AsmPrinter::EmitFile(unsigned Number, StringRef Name) const {
-  O << "\t.file\t" << Number << " \"";
-  for (unsigned i = 0, N = Name.size(); i < N; ++i)
-    printStringChar(O, Name[i]);
-  O << '\"';
-}
-
-
 //===----------------------------------------------------------------------===//
 
 // EmitAlignment - Emit an alignment directive to the specified power of
@@ -779,15 +947,18 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
   
   switch (CE->getOpcode()) {
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  default: llvm_unreachable("FIXME: Don't support this constant cast expr");
+  default:
+    // If the code isn't optimized, there may be outstanding folding
+    // opportunities. Attempt to fold the expression using TargetData as a
+    // last resort before giving up.
+    if (Constant *C =
+          ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+      if (C != CE)
+        return LowerConstant(C, AP);
+#ifndef NDEBUG
+    CE->dump();
+#endif
+    llvm_unreachable("FIXME: Don't support this constant expr");
   case Instruction::GetElementPtr: {
     const TargetData &TD = *AP.TM.getTargetData();
     // Generate a symbolic expression for the byte address
@@ -851,8 +1022,14 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
   }
       
+  // The MC library also has a right-shift operator, but it isn't consistently
+  // signed or unsigned between different targets.
   case Instruction::Add:
   case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::Shl:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
@@ -862,6 +1039,10 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     default: llvm_unreachable("Unknown binary operator constant cast expr");
     case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
     case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
     case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
     case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
     case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
@@ -1012,6 +1193,7 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
 void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
     uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+    if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef.
     return OutStreamer.EmitZeros(Size, AddrSpace);
   }
 
@@ -1295,7 +1477,7 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           ++OpNo;  // Skip over the ID number.
 
           if (Modifier[0] == 'l')  // labels are target independent
-            O << *GetMBBSymbol(MI->getOperand(OpNo).getMBB()->getNumber());
+            O << *MI->getOperand(OpNo).getMBB()->getSymbol(OutContext);
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if ((OpFlags & 7) == 4) {
@@ -1320,6 +1502,7 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
     }
   }
   O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd();
+  OutStreamer.AddBlankLine();
 }
 
 /// printImplicitDef - This method prints the specified machine instruction
@@ -1329,6 +1512,7 @@ void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
   O.PadToColumn(MAI->getCommentColumn());
   O << MAI->getCommentString() << " implicit-def: "
     << TRI->getName(MI->getOperand(0).getReg());
+  OutStreamer.AddBlankLine();
 }
 
 void AsmPrinter::printKill(const MachineInstr *MI) const {
@@ -1340,12 +1524,14 @@ void AsmPrinter::printKill(const MachineInstr *MI) const {
     assert(op.isReg() && "KILL instruction must have only register operands");
     O << ' ' << TRI->getName(op.getReg()) << (op.isDef() ? "<def>" : "<kill>");
   }
+  OutStreamer.AddBlankLine();
 }
 
 /// printLabel - This method prints a local label used by debug and
 /// exception handling tables.
-void AsmPrinter::printLabel(const MachineInstr *MI) const {
+void AsmPrinter::printLabelInst(const MachineInstr *MI) const {
   printLabel(MI->getOperand(0).getImm());
+  OutStreamer.AddBlankLine();
 }
 
 void AsmPrinter::printLabel(unsigned Id) const {
@@ -1368,14 +1554,12 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA,
-                                            const char *Suffix) const {
-  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock(), Suffix);
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
 }
 
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
-                                            const BasicBlock *BB,
-                                            const char *Suffix) const {
+                                            const BasicBlock *BB) const {
   assert(BB->hasName() &&
          "Address of anonymous basic block not supported yet!");
 
@@ -1389,19 +1573,12 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   SmallString<60> NameResult;
   Mang->getNameWithPrefix(NameResult,
                           StringRef("BA") + Twine((unsigned)FnName.size()) + 
-                          "_" + FnName.str() + "_" + BB->getName() + Suffix, 
+                          "_" + FnName.str() + "_" + BB->getName(), 
                           Mangler::Private);
 
   return OutContext.GetOrCreateSymbol(NameResult.str());
 }
 
-MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB"
-    << getFunctionNumber() << '_' << MBBID;
-  return OutContext.GetOrCreateSymbol(Name.str());
-}
-
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
   SmallString<60> Name;
@@ -1412,11 +1589,15 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
 
 /// GetJTISymbol - Return the symbol for the specified jump table entry.
 MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
-  const char *Prefix = isLinkerPrivate ? MAI->getLinkerPrivateGlobalPrefix() :
-                                         MAI->getPrivateGlobalPrefix();
+  return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
   SmallString<60> Name;
-  raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_'
-    << JTID;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << UID << "_set_" << MBBID;
   return OutContext.GetOrCreateSymbol(Name.str());
 }
 
@@ -1476,7 +1657,7 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
   }
 }
 
-/// EmitComments - Pretty-print comments for basic blocks.
+/// PrintBasicBlockLoopComments - Pretty-print comments for basic blocks.
 static void PrintBasicBlockLoopComments(const MachineBasicBlock &MBB,
                                         const MachineLoopInfo *LI,
                                         const AsmPrinter &AP) {
@@ -1555,37 +1736,11 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       PrintBasicBlockLoopComments(*MBB, LI, *this);
     }
 
-    OutStreamer.EmitLabel(GetMBBSymbol(MBB->getNumber()));
+    OutStreamer.EmitLabel(MBB->getSymbol(OutContext));
   }
 }
 
-/// printPICJumpTableSetLabel - This method prints a set label for the
-/// specified MachineBasicBlock for a jumptable entry.
-void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, 
-                                           const MachineBasicBlock *MBB) const {
-  if (!MAI->getSetDirective())
-    return;
-  
-  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
-    << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','
-    << *GetMBBSymbol(MBB->getNumber())
-    << '-' << *GetJTISymbol(uid) << '\n';
-}
-
-void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
-                                           const MachineBasicBlock *MBB) const {
-  if (!MAI->getSetDirective())
-    return;
-  
-  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
-    << getFunctionNumber() << '_' << uid << '_' << uid2
-    << "_set_" << MBB->getNumber() << ','
-    << *GetMBBSymbol(MBB->getNumber())
-    << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
-    << '_' << uid << '_' << uid2 << '\n';
-}
-
-void AsmPrinter::printVisibility(MCSymbol *Sym, unsigned Visibility) const {
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const {
   MCSymbolAttr Attr = MCSA_Invalid;
   
   switch (Visibility) {
@@ -1633,86 +1788,3 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
   return 0;
 }
 
-/// EmitComments - Pretty-print comments for instructions
-void AsmPrinter::EmitComments(const MachineInstr &MI) const {
-  if (!VerboseAsm)
-    return;
-
-  bool Newline = false;
-
-  if (!MI.getDebugLoc().isUnknown()) {
-    DILocation DLT = MF->getDILocation(MI.getDebugLoc());
-
-    // Print source line info.
-    O.PadToColumn(MAI->getCommentColumn());
-    O << MAI->getCommentString() << ' ';
-    DIScope Scope = DLT.getScope();
-    // Omit the directory, because it's likely to be long and uninteresting.
-    if (!Scope.isNull())
-      O << Scope.getFilename();
-    else
-      O << "<unknown>";
-    O << ':' << DLT.getLineNumber();
-    if (DLT.getColumnNumber() != 0)
-      O << ':' << DLT.getColumnNumber();
-    Newline = true;
-  }
-
-  // Check for spills and reloads
-  int FI;
-
-  const MachineFrameInfo *FrameInfo =
-    MI.getParent()->getParent()->getFrameInfo();
-
-  // We assume a single instruction only has a spill or reload, not
-  // both.
-  const MachineMemOperand *MMO;
-  if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
-      MMO = *MI.memoperands_begin();
-      if (Newline) O << '\n';
-      O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload";
-      Newline = true;
-    }
-  }
-  else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
-      if (Newline) O << '\n';
-      O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << ' '
-        << MMO->getSize() << "-byte Folded Reload";
-      Newline = true;
-    }
-  }
-  else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
-      MMO = *MI.memoperands_begin();
-      if (Newline) O << '\n';
-      O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill";
-      Newline = true;
-    }
-  }
-  else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
-      if (Newline) O << '\n';
-      O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << ' '
-        << MMO->getSize() << "-byte Folded Spill";
-      Newline = true;
-    }
-  }
-
-  // Check for spill-induced copies
-  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-  if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
-                                      SrcSubIdx, DstSubIdx)) {
-    if (MI.getAsmPrinterFlag(ReloadReuse)) {
-      if (Newline) O << '\n';
-      O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << " Reload Reuse";
-    }
-  }
-}
-
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 349e0ac..63360c0 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -313,6 +313,7 @@ void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
   D->EmitSectionOffset(Label.getTag(), Section.getTag(),
                        Label.getNumber(), Section.getNumber(),
                        IsSmall, IsEH, UseSet);
+  D->getAsm()->O << '\n'; // FIXME: Necesssary?
 }
 
 /// SizeOf - Determine size of delta value in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 513987f..5093dd9 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/System/Path.h"
@@ -166,7 +167,8 @@ public:
 class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
   DIDescriptor Desc;                  // Debug info descriptor for scope.
-  MDNode * InlinedAtLocation;           // Location at which scope is inlined.
+  // Location at which this scope is inlined.
+  AssertingVH<MDNode> InlinedAtLocation;  
   bool AbstractScope;                 // Abstract Scope
   unsigned StartLabelID;              // Label ID of the beginning of scope.
   unsigned EndLabelID;                // Label ID of the end of scope.
@@ -189,7 +191,7 @@ public:
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
   MDNode *getInlinedAt()         const {
-    return dyn_cast_or_null<MDNode>(InlinedAtLocation);
+    return InlinedAtLocation;
   }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
   unsigned getStartLabelID()     const { return StartLabelID; }
@@ -616,7 +618,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
 
    1).  Add the offset of the forwarding field.
 
-   2).  Follow that pointer to get the the real __Block_byref_x_VarName
+   2).  Follow that pointer to get the real __Block_byref_x_VarName
    struct to use (the real one may have been copied onto the heap).
 
    3).  Add the offset for the field VarName, to find the actual variable.
@@ -937,7 +939,16 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
         ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
-      else
+      else if (Element.getTag() == dwarf::DW_TAG_auto_variable) {
+        DIVariable DV(Element.getNode());
+        ElemDie = new DIE(dwarf::DW_TAG_variable);
+        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                  DV.getName());
+        addType(ElemDie, DV.getType());
+        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addSourceLine(ElemDie, &DV);
+      } else
         ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
       Buffer.addChild(ElemDie);
     }
@@ -949,6 +960,11 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     if (RLang)
       addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
               dwarf::DW_FORM_data1, RLang);
+
+    DICompositeType ContainingType = CTy.getContainingType();
+    if (!ContainingType.isNull())
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, 
+                  getOrCreateTypeDIE(DIType(ContainingType.getNode())));
     break;
   }
   default:
@@ -959,7 +975,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   if (!Name.empty())
     addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
-  if (Tag == dwarf::DW_TAG_enumeration_type ||
+  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type ||
       Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
     // Add size if non-zero (derived types might be zero-sized.)
     if (Size)
@@ -1107,7 +1123,26 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
     // This is not a bitfield.
     addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
 
-  addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+  if (DT.getTag() == dwarf::DW_TAG_inheritance
+      && DT.isVirtual()) {
+
+    // For C++, virtual base classes are not at fixed offset. Use following
+    // expression to extract appropriate offset from vtable.
+    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+    DIEBlock *VBaseLocationDie = new DIEBlock();
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, 
+             VBaseLocationDie);
+  } else
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
@@ -1179,12 +1214,17 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        addType(Arg, DIType(Args.getElement(i).getNode()));
-        addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+        DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+        addType(Arg, ATy);
+        if (ATy.isArtificial())
+          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
         SPDie->addChild(Arg);
       }
   }
 
+  if (SP.isArtificial())
+    addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
   // DW_TAG_inlined_subroutine may refer to this DIE.
   ModuleCU->insertDIE(SP.getNode(), SPDie);
   return SPDie;
@@ -1289,7 +1329,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
  DIE *SPDie = ModuleCU->getDIE(SPNode);
  assert (SPDie && "Unable to find subprogram DIE!");
  DISubprogram SP(SPNode);
- if (SP.isDefinition() && !SP.getContext().isCompileUnit()) {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating 
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit()
+     && !SP.getContext().isSubprogram()) {
    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
   // Add arguments. 
    DICompositeType SPTy = SP.getType();
@@ -1298,8 +1344,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
    if (SPTag == dwarf::DW_TAG_subroutine_type)
      for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-       addType(Arg, DIType(Args.getElement(i).getNode()));
-       addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+       DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+       addType(Arg, ATy);
+       if (ATy.isArtificial())
+         addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
        SPDie->addChild(Arg);
      }
    DIE *SPDeclDie = SPDie;
@@ -1308,7 +1356,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
                SPDeclDie);
    ModuleCU->addDie(SPDie);
  }
-   
+
  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
           DWLabel("func_begin", SubprogramCount));
  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
@@ -1471,6 +1519,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
     else
       addAddress(VariableDie, dwarf::DW_AT_location, Location);
   }
+
+  if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
   DV->setDIE(VariableDie);
   return VariableDie;
 
@@ -1669,6 +1720,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
     addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
                    Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
     addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+    addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     ModuleCU->addDie(VariableSpecDIE);
   } else {
     DIEBlock *Block = new DIEBlock();
@@ -1779,8 +1831,7 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
         FullPath.appendComponent(getSourceFileName(Id.second));
       assert(AppendOk && "Could not append filename to directory!");
       AppendOk = false;
-      Asm->EmitFile(i, FullPath.str());
-      Asm->O << '\n';
+      Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
     }
   }
 
@@ -1986,7 +2037,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
 
 /// extractScopeInformation - Scan machine instructions in this function
 /// and collect DbgScopes. Return true, if atleast one scope was found.
-bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
+bool DwarfDebug::extractScopeInformation() {
   // If scope information was extracted using .dbg intrinsics then there is not
   // any need to extract these information by scanning each instruction.
   if (!DbgScopeMap.empty())
@@ -2081,7 +2132,7 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
 
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
-void DwarfDebug::beginFunction(MachineFunction *MF) {
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
   this->MF = MF;
 
   if (!ShouldEmitDwarfDebug()) return;
@@ -2089,14 +2140,11 @@ void DwarfDebug::beginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  if (!extractScopeInformation(MF))
+  if (!extractScopeInformation())
     return;
 
   collectVariableInfo();
 
-  // Begin accumulating function debug information.
-  MMI->BeginFunction(MF);
-
   // Assumes in correct section after the entry point.
   EmitLabel("func_begin", ++SubprogramCount);
 
@@ -2123,7 +2171,7 @@ void DwarfDebug::beginFunction(MachineFunction *MF) {
 
 /// endFunction - Gather and emit post-function debug information.
 ///
-void DwarfDebug::endFunction(MachineFunction *MF) {
+void DwarfDebug::endFunction(const MachineFunction *MF) {
   if (!ShouldEmitDwarfDebug()) return;
 
   if (TimePassesIsEnabled)
@@ -2366,6 +2414,9 @@ void DwarfDebug::emitDIE(DIE *Die) {
     unsigned Form = AbbrevData[i].getForm();
     assert(Form && "Too many attributes for DIE (check abbreviation)");
 
+    if (Asm->VerboseAsm)
+      Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+    
     switch (Attr) {
     case dwarf::DW_AT_sibling:
       Asm->EmitInt32(Die->getSiblingOffset());
@@ -2380,10 +2431,9 @@ void DwarfDebug::emitDIE(DIE *Die) {
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(this, Form);
+      O << "\n"; // REMOVE This once all EmitValue impls emit their own newline.
       break;
     }
-
-    EOL(dwarf::AttributeString(Attr));
   }
 
   // Emit the DIE children if any.
@@ -2767,7 +2817,8 @@ void DwarfDebug::emitDebugPubTypes() {
 
   EmitLabel("pubtypes_begin", ModuleCU->getID());
 
-  Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF Version");
+  if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DWARF Version");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
 
   EmitSectionOffset("info_begin", "section_info",
                     ModuleCU->getID(), 0, true, false);
@@ -2783,10 +2834,11 @@ void DwarfDebug::emitDebugPubTypes() {
     const char *Name = GI->getKeyData();
     DIE * Entity = GI->second;
 
-    Asm->EmitInt32(Entity->getOffset()); EOL("DIE offset");
+    if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DIE offset");
+    Asm->EmitInt32(Entity->getOffset());
     
     if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("External Name");
-    Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)), 0);
+    Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
   }
 
   Asm->EmitInt32(0); EOL("End Mark");
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index e723621..55baa92 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -103,7 +103,7 @@ class DwarfDebug : public DwarfPrinter {
   ///
   SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
 
-  /// Lines - List of of source line correspondence.
+  /// Lines - List of source line correspondence.
   std::vector<SrcLineInfo> Lines;
 
   /// DIEValues - A list of all the unique values in use.
@@ -523,11 +523,11 @@ public:
 
   /// beginFunction - Gather pre-function debug information.  Assumes being
   /// emitted immediately after the function entry point.
-  void beginFunction(MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF);
 
   /// endFunction - Gather and emit post-function debug information.
   ///
-  void endFunction(MachineFunction *MF);
+  void endFunction(const MachineFunction *MF);
 
   /// recordSourceLine - Records location information and associates it with a 
   /// label. Returns a unique label ID used to generate a label and provide
@@ -550,7 +550,7 @@ public:
 
   /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
-  bool extractScopeInformation(MachineFunction *MF);
+  bool extractScopeInformation();
 
   /// collectVariableInfo - Populate DbgScope entries with variables' info.
   void collectVariableInfo();
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 2ae16c0..c6c59f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Timer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
@@ -49,26 +50,6 @@ DwarfException::~DwarfException() {
   delete ExceptionTimer;
 }
 
-/// SizeOfEncodedValue - Return the size of the encoding in bytes.
-unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
-  if (Encoding == dwarf::DW_EH_PE_omit)
-    return 0;
-
-  switch (Encoding & 0x07) {
-  case dwarf::DW_EH_PE_absptr:
-    return TD->getPointerSize();
-  case dwarf::DW_EH_PE_udata2:
-    return 2;
-  case dwarf::DW_EH_PE_udata4:
-    return 4;
-  case dwarf::DW_EH_PE_udata8:
-    return 8;
-  }
-
-  assert(0 && "Invalid encoded value.");
-  return 0;
-}
-
 /// CreateLabelDiff - Emit a label and subtract it from the expression we
 /// already have.  This is equivalent to emitting "foo - .", but we have to emit
 /// the label for "." directly.
@@ -99,7 +80,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
     TD->getPointerSize() : -TD->getPointerSize();
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  
+
   // Begin eh frame section.
   Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
@@ -127,30 +108,16 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // The personality presence indicates that language specific information will
   // show up in the eh frame.  Find out how we are supposed to lower the
   // personality function reference:
-  const MCExpr *PersonalityRef = 0;
-  bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
-  if (PersonalityFn) {
-    // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
-    
-    // In non-static mode, ask the object file how to represent this reference.
-    PersonalityRef =
-      TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
-                                            Asm->MMI,
-                                            IsPersonalityIndirect,
-                                            IsPersonalityPCRel);
-  }
-  
-  unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-  if (IsPersonalityIndirect)
-    PerEncoding |= dwarf::DW_EH_PE_indirect;
-  unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-  unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
   char Augmentation[6] = { 0 };
   unsigned AugmentationSize = 0;
   char *APtr = Augmentation + 1;
 
-  if (PersonalityRef) {
+  if (PersonalityFn) {
     // There is a personality function.
     *APtr++ = 'P';
     AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
@@ -180,20 +147,19 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
   EOL("CIE Return Address Column");
 
-  EmitULEB128(AugmentationSize, "Augmentation Size");
-  EmitEncodingByte(PerEncoding, "Personality");
-
-  // If there is a personality, we need to indicate the function's location.
-  if (PersonalityRef) {
-    if (!IsPersonalityPCRel)
-      PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
-                                       Index);
-
-    O << MAI->getData32bitsDirective() << *PersonalityRef;
-    EOL("Personality");
+  if (Augmentation[0]) {
+    EmitULEB128(AugmentationSize, "Augmentation Size");
 
-    EmitEncodingByte(LSDAEncoding, "LSDA");
-    EmitEncodingByte(FDEEncoding, "FDE");
+    // If there is a personality, we need to indicate the function's location.
+    if (PersonalityFn) {
+      EmitEncodingByte(PerEncoding, "Personality");
+      EmitReference(PersonalityFn, PerEncoding);
+      EOL("Personality");
+    }
+    if (UsesLSDA[Index])
+      EmitEncodingByte(LSDAEncoding, "LSDA");
+    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+      EmitEncodingByte(FDEEncoding, "FDE");
   }
 
   // Indicate locations of general callee saved registers in frame.
@@ -215,8 +181,12 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
          "Should not emit 'available externally' functions at all");
 
   const Function *TheFunc = EHFrameInfo.function;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
-  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
   // Externally visible entry into the functions eh frame info. If the
   // corresponding function is static, this should not be externally visible.
@@ -254,7 +224,8 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
 
     // EH frame header.
     EmitDifference("eh_frame_end", EHFrameInfo.Number,
-                   "eh_frame_begin", EHFrameInfo.Number, true);
+                   "eh_frame_begin", EHFrameInfo.Number,
+                   true);
     EOL("Length of Frame Information Entry");
 
     EmitLabel("eh_frame_begin", EHFrameInfo.Number);
@@ -265,33 +236,23 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
 
     EOL("FDE CIE offset");
 
-    EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+    EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding);
     EOL("FDE initial location");
     EmitDifference("eh_func_end", EHFrameInfo.Number,
-                   "eh_func_begin", EHFrameInfo.Number, true);
+                   "eh_func_begin", EHFrameInfo.Number,
+                   SizeOfEncodedValue(FDEEncoding) == 4);
     EOL("FDE address range");
 
     // If there is a personality and landing pads then point to the language
     // specific data area in the exception table.
     if (MMI->getPersonalities()[0] != NULL) {
+      unsigned Size = SizeOfEncodedValue(LSDAEncoding);
 
-      if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) {
-        EmitULEB128(4, "Augmentation size");
-
-        if (EHFrameInfo.hasLandingPads)
-          EmitReference("exception", EHFrameInfo.Number, true, true);
-        else
-          Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-      } else {
-        EmitULEB128(TD->getPointerSize(), "Augmentation size");
-
-        if (EHFrameInfo.hasLandingPads) {
-          EmitReference("exception", EHFrameInfo.Number, true, false);
-        } else {
-          Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(),
-                                        0/*addrspace*/);
-        }
-      }
+      EmitULEB128(Size, "Augmentation size");
+      if (EHFrameInfo.hasLandingPads)
+        EmitReference("exception", EHFrameInfo.Number, LSDAEncoding);
+      else
+        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
 
       EOL("Language Specific Data Area");
     } else {
@@ -406,20 +367,22 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 
     if (NumShared < TypeIds.size()) {
       unsigned SizeAction = 0;
-      ActionEntry *PrevAction = 0;
+      unsigned PrevAction = (unsigned)-1;
 
       if (NumShared) {
         const unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
-        PrevAction = &Actions.back();
-        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        PrevAction = Actions.size() - 1;
+        SizeAction =
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
           SizeAction -=
-            MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
-          SizeAction += -PrevAction->NextAction;
-          PrevAction = PrevAction->Previous;
+            MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeAction += -Actions[PrevAction].NextAction;
+          PrevAction = Actions[PrevAction].Previous;
         }
       }
 
@@ -436,7 +399,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 
         ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
         Actions.push_back(Action);
-        PrevAction = &Actions.back();
+        PrevAction = Actions.size() - 1;
       }
 
       // Record the first action of the landing pad site.
@@ -446,7 +409,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
     // Information used when created the call-site table. The action record
     // field of the call site record is the offset of the first associated
     // action record, relative to the start of the actions table. This value is
-    // biased by 1 (1 in dicating the start of the actions table), and 0
+    // biased by 1 (1 indicating the start of the actions table), and 0
     // indicates that there are no actions.
     FirstActions.push_back(FirstAction);
 
@@ -579,7 +542,16 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         }
 
         // Otherwise, create a new call-site.
-        CallSites.push_back(Site);
+        if (MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+          CallSites.push_back(Site);
+        else {
+          // SjLj EH must maintain the call sites in the order assigned
+          // to them by the SjLjPrepare pass.
+          unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+          if (CallSites.size() < SiteNo)
+            CallSites.resize(SiteNo);
+          CallSites[SiteNo - 1] = Site;
+        }
         PreviousIsInvoke = true;
       } else {
         // Create a gap.
@@ -638,8 +610,7 @@ void DwarfException::EmitExceptionTable() {
   // landing pad site.
   SmallVector<ActionEntry, 32> Actions;
   SmallVector<unsigned, 64> FirstActions;
-  unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
-                                             FirstActions);
+  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
 
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
@@ -683,13 +654,13 @@ void DwarfException::EmitExceptionTable() {
 
   // Type infos.
   const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
-  unsigned TTypeFormat;
+  unsigned TTypeEncoding;
   unsigned TypeFormatSize;
 
   if (!HaveTTData) {
     // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
     // that we're omitting that bit.
-    TTypeFormat = dwarf::DW_EH_PE_omit;
+    TTypeEncoding = dwarf::DW_EH_PE_omit;
     TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
@@ -719,14 +690,8 @@ void DwarfException::EmitExceptionTable() {
     // somewhere.  This predicate should be moved to a shared location that is
     // in target-independent code.
     //
-    if (LSDASection->getKind().isWriteable() ||
-        Asm->TM.getRelocationModel() == Reloc::Static)
-      TTypeFormat = dwarf::DW_EH_PE_absptr;
-    else
-      TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-
-    TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+    TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+    TypeFormatSize = SizeOfEncodedValue(TTypeEncoding);
   }
 
   // Begin the exception table.
@@ -752,7 +717,7 @@ void DwarfException::EmitExceptionTable() {
   // does, instead output it before the table.
   unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
   unsigned TyOffset = sizeof(int8_t) +          // Call site format
-    MCAsmInfo::getULEB128Size(SizeSites) +      // Call-site table length
+    MCAsmInfo::getULEB128Size(SizeSites) +      // Call site table length
     SizeSites + SizeActions + SizeTypes;
   unsigned TotalSize = sizeof(int8_t) +         // LPStart format
                        sizeof(int8_t) +         // TType format
@@ -777,7 +742,7 @@ void DwarfException::EmitExceptionTable() {
 
   // Emit the header.
   EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
-  EmitEncodingByte(TTypeFormat, "@TType");
+  EmitEncodingByte(TTypeEncoding, "@TType");
 
   if (HaveTTData)
     EmitULEB128(TyOffset, "@TType base offset");
@@ -826,7 +791,7 @@ void DwarfException::EmitExceptionTable() {
 
     // Emit the landing pad call site table.
     EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-    EmitULEB128(SizeSites, "Call site table size");
+    EmitULEB128(SizeSites, "Call site table length");
 
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
@@ -859,13 +824,14 @@ void DwarfException::EmitExceptionTable() {
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      if (!S.PadLabel)
+      if (!S.PadLabel) {
+        Asm->OutStreamer.AddComment("Landing pad");
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-      else
+      } else {
         EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
                           true, true);
-
-      EOL("Landing pad");
+        EOL("Landing pad");
+      }
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
@@ -875,38 +841,43 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Action Table.
+  if (Actions.size() != 0) EOL("-- Action Record Table --");
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
+    EOL("Action Record:");
 
     // Type Filter
     //
     //   Used by the runtime to match the type of the thrown exception to the
     //   type of the catch clauses or the types in the exception specification.
-    EmitSLEB128(Action.ValueForTypeID, "TypeInfo index");
+    EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
 
     // Action Record
     //
     //   Self-relative signed displacement in bytes of the next action record,
     //   or 0 if there is no next action record.
-    EmitSLEB128(Action.NextAction, "Next action");
+    EmitSLEB128(Action.NextAction, "  Next action");
   }
 
   // Emit the Catch TypeInfos.
+  if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --");
   for (std::vector<GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
-    PrintRelDirective();
 
-    if (GV)
-      O << *Asm->GetGlobalValueSymbol(GV);
-    else
+    if (GV) {
+      EmitReference(GV, TTypeEncoding);
+      EOL("TypeInfo");
+    } else {
+      PrintRelDirective(TTypeEncoding);
       O << "0x0";
-
-    EOL("TypeInfo");
+      EOL("");
+    }
   }
 
   // Emit the Exception Specifications.
+  if (!FilterIds.empty()) EOL("-- Filter IDs --");
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
@@ -943,7 +914,7 @@ void DwarfException::EndModule() {
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
-void DwarfException::BeginFunction(MachineFunction *MF) {
+void DwarfException::BeginFunction(const MachineFunction *MF) {
   if (!MMI || !MAI->doesSupportExceptionHandling()) return;
 
   if (TimePassesIsEnabled)
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 3921e91..3db1a00 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -76,9 +76,6 @@ class DwarfException : public DwarfPrinter {
   /// ExceptionTimer - Timer for the Dwarf exception writer.
   Timer *ExceptionTimer;
 
-  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
-  unsigned SizeOfEncodedValue(unsigned Encoding);
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.
@@ -103,7 +100,7 @@ class DwarfException : public DwarfPrinter {
   ///     exception.  If it matches then the exception and type id are passed
   ///     on to the landing pad.  Otherwise the next action is looked up.  This
   ///     chain is terminated with a next action of zero.  If no type id is
-  ///     found the the frame is unwound and handling continues.
+  ///     found the frame is unwound and handling continues.
   ///  3. Type id table contains references to all the C++ typeinfo for all
   ///     catches in the function.  This tables is reversed indexed base 1.
 
@@ -135,7 +132,7 @@ class DwarfException : public DwarfPrinter {
   struct ActionEntry {
     int ValueForTypeID; // The value to write - may not be equal to the type id.
     int NextAction;
-    struct ActionEntry *Previous;
+    unsigned Previous;
   };
 
   /// CallSiteEntry - Structure describing an entry in the call-site table.
@@ -197,7 +194,7 @@ public:
 
   /// BeginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  void BeginFunction(MachineFunction *MF);
+  void BeginFunction(const MachineFunction *MF);
 
   /// EndFunction - Gather and emit post-function exception information.
   void EndFunction();
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index d204bba..1299d04 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Emit general DWARF directives.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #include "DwarfPrinter.h"
@@ -18,13 +18,17 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
@@ -33,6 +37,26 @@ DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
   RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
   SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
 
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+
+  switch (Encoding & 0x07) {
+  case dwarf::DW_EH_PE_absptr:
+    return TD->getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+    return 8;
+  }
+
+  assert(0 && "Invalid encoded value.");
+  return 0;
+}
+
 void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
   if (isInSection && MAI->getDwarfSectionOffsetDirective())
     O << MAI->getDwarfSectionOffsetDirective();
@@ -42,6 +66,14 @@ void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
     O << MAI->getData64bitsDirective();
 }
 
+void DwarfPrinter::PrintRelDirective(unsigned Encoding) const {
+  unsigned Size = SizeOfEncodedValue(Encoding);
+  assert((Size == 4 || Size == 8) && "Do not support other types or rels!");
+
+  O << (Size == 4 ?
+        MAI->getData32bitsDirective() : MAI->getData64bitsDirective());
+}
+
 /// EOL - Print a newline character to asm stream.  If a comment is present
 /// then it will be printed first.  Comments should not contain '\n'.
 void DwarfPrinter::EOL(const Twine &Comment) const {
@@ -195,13 +227,38 @@ void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative,
   if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 
-/// EmitDifference - Emit the difference between two labels.  Some assemblers do
-/// not behave with absolute expressions with data directives, so there is an
-/// option (needsSet) to use an intermediary set expression.
+void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
+                                 unsigned Encoding) const {
+  SmallString<64> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+                            << Tag << Number;
+
+  MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+  EmitReference(Sym, Encoding);
+}
+
+void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  PrintRelDirective(Encoding);
+  O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);;
+}
+
+void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  PrintRelDirective(Encoding);
+  O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang,
+                                              Asm->MMI, Encoding);;
+}
+
+/// EmitDifference - Emit the difference between two labels.  If this assembler
+/// supports .set, we emit a .set of a temporary and then use it in the .word.
 void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi,
                                   const char *TagLo, unsigned NumberLo,
                                   bool IsSmall) {
-  if (MAI->needsSet()) {
+  if (MAI->hasSetDirective()) {
+    // FIXME: switch to OutStreamer.EmitAssignment.
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
@@ -232,7 +289,8 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
   else
     printAbsolute = MAI->isAbsoluteDebugSectionOffsets();
 
-  if (MAI->needsSet() && useSet) {
+  if (MAI->hasSetDirective() && useSet) {
+    // FIXME: switch to OutStreamer.EmitAssignment.
     O << "\t.set\t";
     PrintLabelName("set", SetCounter, Flavor);
     O << ",";
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 86fe2ab..73de398 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -28,11 +28,14 @@ class Module;
 class MCAsmInfo;
 class TargetData;
 class TargetRegisterInfo;
+class GlobalValue;
 class MCSymbol;
 class Twine;
 
 class DwarfPrinter {
 protected:
+  ~DwarfPrinter() {}
+
   //===-------------------------------------------------------------==---===//
   // Core attributes used by the DWARF printer.
   //
@@ -56,7 +59,7 @@ protected:
   Module *M;
 
   /// MF - Current machine function.
-  MachineFunction *MF;
+  const MachineFunction *MF;
 
   /// MMI - Collected machine module information.
   MachineModuleInfo *MMI;
@@ -83,6 +86,10 @@ public:
   const MCAsmInfo *getMCAsmInfo() const { return MAI; }
   const TargetData *getTargetData() const { return TD; }
 
+  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+  unsigned SizeOfEncodedValue(unsigned Encoding) const;
+
+  void PrintRelDirective(unsigned Encoding) const;
   void PrintRelDirective(bool Force32Bit = false,
                          bool isInSection = false) const;
 
@@ -138,9 +145,11 @@ public:
   void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false,
                      bool Force32Bit = false) const;
 
-  /// EmitDifference - Emit the difference between two labels.  Some
-  /// assemblers do not behave with absolute expressions with data directives,
-  /// so there is an option (needsSet) to use an intermediary set expression.
+  void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const;
+  void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
+  void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+
+  /// EmitDifference - Emit the difference between two labels.
   void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
                       bool IsSmall = false) {
     EmitDifference(LabelHi.getTag(), LabelHi.getNumber(),
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index dd8d88a..08e1bbc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -57,14 +57,14 @@ void DwarfWriter::EndModule() {
 
 /// BeginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
-void DwarfWriter::BeginFunction(MachineFunction *MF) {
+void DwarfWriter::BeginFunction(const MachineFunction *MF) {
   DE->BeginFunction(MF);
   DD->beginFunction(MF);
 }
 
 /// EndFunction - Gather and emit post-function debug information.
 ///
-void DwarfWriter::EndFunction(MachineFunction *MF) {
+void DwarfWriter::EndFunction(const MachineFunction *MF) {
   DD->endFunction(MF);
   DE->EndFunction();
 
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
index b0071d0..60aa6cb 100644
--- a/lib/CodeGen/AsmPrinter/Makefile
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -9,6 +9,5 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMAsmPrinter
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 92849d3..faf4d95 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -133,7 +133,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
   SmallSet<unsigned, 4> ImpDefRegs;
   MachineBasicBlock::iterator I = MBB->begin();
   while (I != MBB->end()) {
-    if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+    if (!I->isImplicitDef())
       break;
     unsigned Reg = I->getOperand(0).getReg();
     ImpDefRegs.insert(Reg);
@@ -206,53 +206,56 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
   // See if any jump tables have become mergable or dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI == 0) {
+    delete RS;
+    return MadeChange;
+  }
+  
   const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
-  if (!JTs.empty()) {
-    // Figure out how these jump tables should be merged.
-    std::vector<unsigned> JTMapping;
-    JTMapping.reserve(JTs.size());
-
-    // We always keep the 0th jump table.
-    JTMapping.push_back(0);
-
-    // Scan the jump tables, seeing if there are any duplicates.  Note that this
-    // is N^2, which should be fixed someday.
-    for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
-      if (JTs[i].MBBs.empty())
-        JTMapping.push_back(i);
-      else
-        JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
-    }
-
-    // If a jump table was merge with another one, walk the function rewriting
-    // references to jump tables to reference the new JT ID's.  Keep track of
-    // whether we see a jump table idx, if not, we can delete the JT.
-    BitVector JTIsLive(JTs.size());
-    for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
-         BB != E; ++BB) {
-      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
-           I != E; ++I)
-        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
-          MachineOperand &Op = I->getOperand(op);
-          if (!Op.isJTI()) continue;
-          unsigned NewIdx = JTMapping[Op.getIndex()];
-          Op.setIndex(NewIdx);
-
-          // Remember that this JT is live.
-          JTIsLive.set(NewIdx);
-        }
-    }
+  // Figure out how these jump tables should be merged.
+  std::vector<unsigned> JTMapping;
+  JTMapping.reserve(JTs.size());
+
+  // We always keep the 0th jump table.
+  JTMapping.push_back(0);
+
+  // Scan the jump tables, seeing if there are any duplicates.  Note that this
+  // is N^2, which should be fixed someday.
+  for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
+    if (JTs[i].MBBs.empty())
+      JTMapping.push_back(i);
+    else
+      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+  }
 
-    // Finally, remove dead jump tables.  This happens either because the
-    // indirect jump was unreachable (and thus deleted) or because the jump
-    // table was merged with some other one.
-    for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
-      if (!JTIsLive.test(i)) {
-        JTI->RemoveJumpTable(i);
-        MadeChange = true;
+  // If a jump table was merge with another one, walk the function rewriting
+  // references to jump tables to reference the new JT ID's.  Keep track of
+  // whether we see a jump table idx, if not, we can delete the JT.
+  BitVector JTIsLive(JTs.size());
+  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+       BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I)
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+        MachineOperand &Op = I->getOperand(op);
+        if (!Op.isJTI()) continue;
+        unsigned NewIdx = JTMapping[Op.getIndex()];
+        Op.setIndex(NewIdx);
+
+        // Remember that this JT is live.
+        JTIsLive.set(NewIdx);
       }
   }
 
+  // Finally, remove dead jump tables.  This happens either because the
+  // indirect jump was unreachable (and thus deleted) or because the jump
+  // table was merged with some other one.
+  for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+    if (!JTIsLive.test(i)) {
+      JTI->RemoveJumpTable(i);
+      MadeChange = true;
+    }
+
   delete RS;
   return MadeChange;
 }
@@ -337,7 +340,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
         // relative order. This is untenable because normal compiler
         // optimizations (like this one) may reorder and/or merge these
         // directives.
-        I1->getOpcode() == TargetInstrInfo::INLINEASM) {
+        I1->isInlineAsm()) {
       ++I1; ++I2;
       break;
     }
@@ -957,7 +960,8 @@ ReoptimizeBlock:
       }
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
-      MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough);
+      if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+        MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
       MadeChange = true;
     }
     return MadeChange;
@@ -1191,7 +1195,8 @@ ReoptimizeBlock:
           }
 
           // Change any jumptables to go to the new MBB.
-          MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB);
+          if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+            MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
           if (DidChange) {
             ++NumBranchOpts;
             MadeChange = true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 17072d3..62cf339 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMCodeGen
   LiveStackAnalysis.cpp
   LiveVariables.cpp
   LowerSubregs.cpp
-  MachOWriter.cpp
   MachineBasicBlock.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -40,6 +39,7 @@ add_llvm_library(LLVMCodeGen
   ObjectCodeEmitter.cpp
   OcamlGC.cpp
   OptimizeExts.cpp
+  OptimizePHIs.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
@@ -67,6 +67,7 @@ add_llvm_library(LLVMCodeGen
   StrongPHIElimination.cpp
   TailDuplication.cpp
   TargetInstrInfoImpl.cpp
+  TargetLoweringObjectFileImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index b8ef219..2bedd04 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 using namespace llvm;
 
 char CalculateSpillWeights::ID = 0;
@@ -58,10 +58,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
     for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
          mii != mie; ++mii) {
       const MachineInstr *mi = mii;
-      if (tii->isIdentityCopy(*mi))
-        continue;
-
-      if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+      if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue())
         continue;
 
       for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 126700b..a13a310 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -106,7 +106,7 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
   // At the time of this writing, there are blocks which AnalyzeBranch
   // thinks end in single uncoditional branches, yet which have two CFG
   // successors. Code in this file is not prepared to reason about such things.
-  if (!MBB->empty() && MBB->back().getOpcode() == TargetInstrInfo::EH_LABEL)
+  if (!MBB->empty() && MBB->back().isEHLabel())
     return false;
 
   // Aggressively handle return blocks and similar constructs.
@@ -115,7 +115,7 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
   // Ask the target's AnalyzeBranch if it can handle this block.
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
-  // Make the the terminator is understood.
+  // Make sure the terminator is understood.
   if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
     return false;
   // Make sure we have the option of reversing the condition.
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0982eab..a215a19 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "codegen-dce"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -19,8 +20,11 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+
 namespace {
   class DeadMachineInstructionElim : public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -51,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, SawStore, 0))
+  if (!MI->isSafeToMove(TII, SawStore, 0) && !MI->isPHI())
     return false;
 
   // Examine each operand.
@@ -60,8 +64,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
-          LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
-        // This def has a use. Don't delete the instruction!
+          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+        // This def has a non-debug use. Don't delete the instruction!
         return false;
       }
     }
@@ -110,8 +114,31 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
         DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        // It is possible that some DBG_VALUE instructions refer to this
+        // instruction.  Examine each def operand for such references;
+        // if found, mark the DBG_VALUE as undef (but don't delete it).
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+          MachineRegisterInfo::use_iterator nextI;
+          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+               E = MRI->use_end(); I!=E; I=nextI) {
+            nextI = llvm::next(I);  // I is invalidated by the setReg
+            MachineOperand& Use = I.getOperand();
+            MachineInstr *UseMI = Use.getParent();
+            if (UseMI==MI)
+              continue;
+            assert(Use.isDebug());
+            UseMI->getOperand(0).setReg(0U);
+          }
+        }
         AnyChanges = true;
         MI->eraseFromParent();
+        ++NumDeletes;
         MIE = MBB->rend();
         // MII is now pointing to the next instruction to process,
         // so don't increment it.
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 11a85a0..8416d3b 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -62,7 +62,8 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) {
   // They need to be emitted before the function because in some targets
   // the later may reference JT or CP entry address.
   emitConstantPool(MF.getConstantPool());
-  emitJumpTables(MF.getJumpTableInfo());
+  if (MF.getJumpTableInfo())
+    emitJumpTables(MF.getJumpTableInfo());
 }
 
 /// finishFunction - This callback is invoked after the function is completely
@@ -84,7 +85,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
 
   // Patch up Jump Table Section relocations to use the real MBBs offsets
   // now that the MBB label offsets inside the function are known.
-  if (!MF.getJumpTableInfo()->isEmpty()) {
+  if (MF.getJumpTableInfo()) {
     ELFSection &JTSection = EW.getJumpTableSection();
     for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
          MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
@@ -172,7 +173,7 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
          "PIC codegen not yet handled for elf jump tables!");
 
   const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
-  unsigned EntrySize = MJTI->getEntrySize();
+  unsigned EntrySize = 4; //MJTI->getEntrySize();
 
   // Get the ELF Section to emit the jump table
   ELFSection &JTSection = EW.getJumpTableSection();
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index de45e09..0979c04 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -37,7 +37,6 @@
 #include "llvm/PassManager.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
@@ -59,15 +58,6 @@ using namespace llvm;
 
 char ELFWriter::ID = 0;
 
-/// AddELFWriter - Add the ELF writer to the function pass manager
-ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
-                                      raw_ostream &O,
-                                      TargetMachine &TM) {
-  ELFWriter *EW = new ELFWriter(O, TM);
-  PM.add(EW);
-  return EW->getObjectCodeEmitter();
-}
-
 //===----------------------------------------------------------------------===//
 //                          ELFWriter Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 266c74c..61959bb 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This implements a a hazard recognizer using the instructions itineraries
+// This implements a hazard recognizer using the instructions itineraries
 // defined for the current target.
 //
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 79b2986..b5006fd 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -335,7 +335,7 @@ unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
   unsigned Label = MMI->NextLabelID();
   
   BuildMI(MBB, MI, DL,
-          TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label);
+          TII->get(TargetOpcode::GC_LABEL)).addImm(Label);
   
   return Label;
 }
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 9997a48..87ab7ef 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -155,7 +155,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
 /// instruction IP.
 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
-  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+  assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
   
@@ -251,7 +251,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
 /// instruction IP.
 static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
-  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+  assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
 
   static const uint64_t MaskValues[6] = {
     0x5555555555555555ULL, 0x3333333333333333ULL,
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 837e184..278de02 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -14,16 +14,20 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
@@ -57,14 +61,24 @@ static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
     cl::desc("Print LLVM IR produced by the loop-reduce pass"));
 static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
     cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
-    cl::desc("Dump emitter generated instructions as assembly"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+           cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+  switch (AsmVerbose) {
+  default:
+  case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+  case cl::BOU_TRUE:  return true;
+  case cl::BOU_FALSE: return false;
+  }      
+}
 
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
@@ -98,139 +112,81 @@ LLVMTargetMachine::setCodeModelForStatic() {
   setCodeModel(CodeModel::Small);
 }
 
-FileModel::Model
-LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                       formatted_raw_ostream &Out,
-                                       CodeGenFileType FileType,
-                                       CodeGenOpt::Level OptLevel) {
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                            formatted_raw_ostream &Out,
+                                            CodeGenFileType FileType,
+                                            CodeGenOpt::Level OptLevel) {
   // Add common CodeGen passes.
   if (addCommonCodeGenPasses(PM, OptLevel))
-    return FileModel::Error;
+    return true;
 
+  OwningPtr<MCContext> Context(new MCContext());
+  OwningPtr<MCStreamer> AsmStreamer;
+
+  formatted_raw_ostream *LegacyOutput;
   switch (FileType) {
-  default:
+  default: return true;
+  case CGFT_AssemblyFile: {
+    const MCAsmInfo &MAI = *getMCAsmInfo();
+    MCInstPrinter *InstPrinter =
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, Out);
+    AsmStreamer.reset(createAsmStreamer(*Context, Out, MAI,
+                                        getTargetData()->isLittleEndian(),
+                                        getVerboseAsm(), InstPrinter,
+                                        /*codeemitter*/0));
+    // Set the AsmPrinter's "O" to the output file.
+    LegacyOutput = &Out;
     break;
-  case TargetMachine::AssemblyFile:
-    if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out))
-      return FileModel::Error;
-    return FileModel::AsmFile;
-  case TargetMachine::ObjectFile:
-    if (!addObjectFileEmitter(PM, OptLevel, Out))
-      return FileModel::MachOFile;
-    else if (getELFWriterInfo())
-      return FileModel::ElfFile; 
   }
-  return FileModel::Error;
-}
-
-bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                           CodeGenOpt::Level OptLevel,
-                                           bool Verbose,
-                                           formatted_raw_ostream &Out) {
+  case CGFT_ObjectFile: {
+    // Create the code emitter for the target if it exists.  If not, .o file
+    // emission fails.
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
+    if (MCE == 0)
+      return true;
+    
+    AsmStreamer.reset(createMachOStreamer(*Context, Out, MCE));
+    
+    // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
+    // force it to come out stderr.
+    // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
+    // asmprinter.
+    LegacyOutput = new formatted_raw_ostream(errs());
+    break;
+  }
+  case CGFT_Null:
+    // The Null output is intended for use for performance analysis and testing,
+    // not real users.
+    AsmStreamer.reset(createNullStreamer(*Context));
+    // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
+    // force it to come out stderr.
+    // FIXME: this is horrible and leaks, eventually remove the raw_ostream from
+    // asmprinter.
+    LegacyOutput = new formatted_raw_ostream(errs());
+    break;
+  }
+  
+  // Create the AsmPrinter, which takes ownership of Context and AsmStreamer
+  // if successful.
   FunctionPass *Printer =
-    getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose);
-  if (!Printer)
-    return true;
-
-  PM.add(Printer);
-  return false;
-}
-
-bool LLVMTargetMachine::addObjectFileEmitter(PassManagerBase &PM,
-                                             CodeGenOpt::Level OptLevel,
-                                             formatted_raw_ostream &Out) {
-  MCCodeEmitter *Emitter = getTarget().createCodeEmitter(*this);
-  if (!Emitter)
+    getTarget().createAsmPrinter(*LegacyOutput, *this, *Context, *AsmStreamer,
+                                 getMCAsmInfo());
+  if (Printer == 0)
     return true;
   
-  PM.add(createMachOWriter(Out, *this, getMCAsmInfo(), Emitter));
-  return false;
-}
-
-/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
-/// be split up (e.g., to add an object writer pass), this method can be used to
-/// finish up adding passes to emit the file, if necessary.
-bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
-                                                  MachineCodeEmitter *MCE,
-                                                  CodeGenOpt::Level OptLevel) {
-  // Make sure the code model is set.
-  setCodeModelForStatic();
+  // If successful, createAsmPrinter took ownership of AsmStreamer and Context.
+  Context.take(); AsmStreamer.take();
   
-  if (MCE)
-    addSimpleCodeEmitter(PM, OptLevel, *MCE);
-  if (PrintEmittedAsm)
-    addAssemblyEmitter(PM, OptLevel, true, ferrs());
-
-  PM.add(createGCInfoDeleter());
-
-  return false; // success!
-}
-
-/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
-/// be split up (e.g., to add an object writer pass), this method can be used to
-/// finish up adding passes to emit the file, if necessary.
-bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
-                                                  JITCodeEmitter *JCE,
-                                                  CodeGenOpt::Level OptLevel) {
-  // Make sure the code model is set.
-  setCodeModelForJIT();
+  PM.add(Printer);
   
-  if (JCE)
-    addSimpleCodeEmitter(PM, OptLevel, *JCE);
-  if (PrintEmittedAsm)
-    addAssemblyEmitter(PM, OptLevel, true, ferrs());
-
-  PM.add(createGCInfoDeleter());
-
-  return false; // success!
-}
-
-/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
-/// be split up (e.g., to add an object writer pass), this method can be used to
-/// finish up adding passes to emit the file, if necessary.
-bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
-                                                  ObjectCodeEmitter *OCE,
-                                                  CodeGenOpt::Level OptLevel) {
   // Make sure the code model is set.
   setCodeModelForStatic();
-  
-  if (OCE)
-    addSimpleCodeEmitter(PM, OptLevel, *OCE);
-  if (PrintEmittedAsm)
-    addAssemblyEmitter(PM, OptLevel, true, ferrs());
-
-  PM.add(createGCInfoDeleter());
-
-  return false; // success!
-}
-
-/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
-/// get machine code emitted.  This uses a MachineCodeEmitter object to handle
-/// actually outputting the machine code and resolving things like the address
-/// of functions.  This method should returns true if machine code emission is
-/// not supported.
-///
-bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
-                                                   MachineCodeEmitter &MCE,
-                                                   CodeGenOpt::Level OptLevel) {
-  // Make sure the code model is set.
-  setCodeModelForJIT();
-  
-  // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel))
-    return true;
-
-  addCodeEmitter(PM, OptLevel, MCE);
-  if (PrintEmittedAsm)
-    addAssemblyEmitter(PM, OptLevel, true, ferrs());
-
   PM.add(createGCInfoDeleter());
-
-  return false; // success!
+  return false;
 }
 
 /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
-/// get machine code emitted.  This uses a MachineCodeEmitter object to handle
+/// get machine code emitted.  This uses a JITCodeEmitter object to handle
 /// actually outputting the machine code and resolving things like the address
 /// of functions.  This method should returns true if machine code emission is
 /// not supported.
@@ -246,9 +202,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   addCodeEmitter(PM, OptLevel, JCE);
-  if (PrintEmittedAsm)
-    addAssemblyEmitter(PM, OptLevel, true, ferrs());
-
   PM.add(createGCInfoDeleter());
 
   return false; // success!
@@ -282,6 +235,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
     if (PrintLSR)
       PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+#ifndef NDEBUG
+    PM.add(createVerifierPass());
+#endif
   }
 
   // Turn exception handling constructs into something the code generators can
@@ -339,6 +295,16 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   printAndVerify(PM, "After Instruction Selection",
                  /* allowDoubleDefs= */ true);
 
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createOptimizePHIsPass());
+
+  // Delete dead machine instructions regardless of optimization level.
+  PM.add(createDeadMachineInstructionElimPass());
+  printAndVerify(PM, "After codegen DCE pass",
+                 /* allowDoubleDefs= */ true);
+
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 8746bf9..f6bf433 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -140,7 +140,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
        << ":\t\t# derived from " << mbbi->getName() << "\n";
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
-      if (mii->getOpcode()==TargetInstrInfo::DEBUG_VALUE)
+      if (mii->isDebugValue())
         OS << SlotIndex::getEmptyKey() << '\t' << *mii;
       else
         OS << getInstructionIndex(mii) << '\t' << *mii;
@@ -288,9 +288,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     VNInfo *ValNo;
     MachineInstr *CopyMI = NULL;
     unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-    if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-        mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-        mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+    if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
         tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
       CopyMI = mi;
     // Earlyclobbers move back one.
@@ -460,9 +458,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       VNInfo *ValNo;
       MachineInstr *CopyMI = NULL;
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-      if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-          mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-          mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+      if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()||
           tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
         CopyMI = mi;
       ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
@@ -516,6 +512,8 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   baseIndex = baseIndex.getNextIndex();
   while (++mi != MBB->end()) {
 
+    if (mi->isDebugValue())
+      continue;
     if (getInstructionFromIndex(baseIndex) == 0)
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
 
@@ -531,8 +529,8 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
           end = baseIndex.getDefIndex();
         } else {
           // Another instruction redefines the register before it is ever read.
-          // Then the register is essentially dead at the instruction that defines
-          // it. Hence its interval is:
+          // Then the register is essentially dead at the instruction that
+          // defines it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
           DEBUG(dbgs() << " dead");
           end = start.getStoreIndex();
@@ -577,9 +575,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
   else if (allocatableRegs_[MO.getReg()]) {
     MachineInstr *CopyMI = NULL;
     unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-    if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-        MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-        MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+    if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() ||
         tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
       CopyMI = MI;
     handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
@@ -612,8 +608,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 
   SlotIndex end = baseIndex;
   bool SeenDefUse = false;
-  
-  while (mi != MBB->end()) {
+
+  MachineBasicBlock::iterator E = MBB->end();  
+  while (mi != E) {
+    if (mi->isDebugValue()) {
+      ++mi;
+      if (mi != E && !mi->isDebugValue()) {
+        baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+      }
+      continue;
+    }
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
@@ -631,7 +635,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
     }
 
     ++mi;
-    if (mi != MBB->end()) {
+    if (mi != E && !mi->isDebugValue()) {
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
     }
   }
@@ -671,6 +675,9 @@ void LiveIntervals::computeIntervals() {
   for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
+    if (MBB->empty())
+      continue;
+
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
     DEBUG(dbgs() << MBB->getName() << ":\n");
@@ -693,7 +700,7 @@ void LiveIntervals::computeIntervals() {
     for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
          MI != miEnd; ++MI) {
       DEBUG(dbgs() << MIIndex << "\t" << *MI);
-      if (MI->getOpcode()==TargetInstrInfo::DEBUG_VALUE)
+      if (MI->isDebugValue())
         continue;
 
       // Handle defs.
@@ -742,7 +749,7 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
   if (!VNI->getCopy())
     return 0;
 
-  if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+  if (VNI->getCopy()->isExtractSubreg()) {
     // If it's extracting out of a physical register, return the sub-register.
     unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -756,8 +763,8 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
       Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
     }
     return Reg;
-  } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-             VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+  } else if (VNI->getCopy()->isInsertSubreg() ||
+             VNI->getCopy()->isSubregToReg())
     return VNI->getCopy()->getOperand(2).getReg();
 
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
@@ -919,7 +926,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
                                          SmallVector<unsigned, 2> &Ops,
                                          bool isSS, int Slot, unsigned Reg) {
   // If it is an implicit def instruction, just delete it.
-  if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+  if (MI->isImplicitDef()) {
     RemoveMachineInstrFromMaps(MI);
     vrm.RemoveMachineInstrFromMaps(MI);
     MI->eraseFromParent();
@@ -1059,7 +1066,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       // If this is the rematerializable definition MI itself and
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
-        DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: "
+        DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
                      << MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
@@ -1302,6 +1309,12 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineInstr *MI = &*ri;
     MachineOperand &O = ri.getOperand();
     ++ri;
+    if (MI->isDebugValue()) {
+      // Remove debug info for now.
+      O.setReg(0U);
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      continue;
+    }
     assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
     SlotIndex index = getInstructionIndex(MI);
     if (index < start || index >= end)
@@ -1525,7 +1538,7 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
     MachineInstr *MI = &*ri;
     ++ri;
     if (O.isDef()) {
-      assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF &&
+      assert(MI->isImplicitDef() &&
              "Register def was not rewritten?");
       RemoveMachineInstrFromMaps(MI);
       vrm.RemoveMachineInstrFromMaps(MI);
@@ -2056,7 +2069,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
         std::string msg;
         raw_string_ostream Msg(msg);
         Msg << "Ran out of registers during register allocation!";
-        if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+        if (MI->isInlineAsm()) {
           Msg << "\nPlease check your inline asm statement for invalid "
               << "constraints:\n";
           MI->print(Msg, tm_);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index b44a220..8a124dc 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -543,6 +543,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
          I != E; ++I) {
       MachineInstr *MI = I;
+      if (MI->isDebugValue())
+        continue;
       DistanceMap.insert(std::make_pair(MI, Dist++));
 
       // Process all of the operands of the instruction...
@@ -550,7 +552,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
 
       // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
       // of the uses.  They will be handled in other basic blocks.
-      if (MI->getOpcode() == TargetInstrInfo::PHI)
+      if (MI->isPHI())
         NumOperandsToProcess = 1;
 
       SmallVector<unsigned, 4> UseRegs;
@@ -692,7 +694,7 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
   for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
-         BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+         BBI != BBE && BBI->isPHI(); ++BBI)
       for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
         PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
           .push_back(BBI->getOperand(i).getReg());
@@ -771,8 +773,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
 
   // All registers used by PHI nodes in SuccBB must be live through BB.
   for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
-         BBE = SuccBB->end();
-       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+         BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
       if (BBI->getOperand(i+1).getMBB() == BB)
         getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 1121d9b..b4ef648 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -129,7 +129,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
     if (MI->getOperand(1).isKill()) {
       // We must make sure the super-register gets killed. Replace the
       // instruction with KILL.
-      MI->setDesc(TII->get(TargetInstrInfo::KILL));
+      MI->setDesc(TII->get(TargetOpcode::KILL));
       MI->RemoveOperand(2);     // SubIdx
       DEBUG(dbgs() << "subreg: replace by: " << *MI);
       return true;
@@ -242,7 +242,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
     // <undef>, we need to make sure it is alive by inserting a KILL
     if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
       MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                TII->get(TargetInstrInfo::KILL), DstReg);
+                                TII->get(TargetOpcode::KILL), DstReg);
       if (MI->getOperand(2).isUndef())
         MIB.addReg(InsReg, RegState::Undef);
       else
@@ -260,7 +260,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
       // If the source register being inserted is undef, then this becomes a
       // KILL.
       BuildMI(*MBB, MI, MI->getDebugLoc(),
-              TII->get(TargetInstrInfo::KILL), DstSubReg);
+              TII->get(TargetOpcode::KILL), DstSubReg);
     else {
       bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
       (void)Emitted;
@@ -314,11 +314,11 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
          mi != me;) {
       MachineBasicBlock::iterator nmi = llvm::next(mi);
       MachineInstr *MI = mi;
-      if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+      if (MI->isExtractSubreg()) {
         MadeChange |= LowerExtract(MI);
-      } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+      } else if (MI->isInsertSubreg()) {
         MadeChange |= LowerInsert(MI);
-      } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+      } else if (MI->isSubregToReg()) {
         MadeChange |= LowerSubregToReg(MI);
       }
       mi = nmi;
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
deleted file mode 100644
index e8bbe21..0000000
--- a/lib/CodeGen/MachOWriter.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the target-independent Mach-O writer.  This file writes
-// out the Mach-O file in the following order:
-//
-//  #1 FatHeader (universal-only)
-//  #2 FatArch (universal-only, 1 per universal arch)
-//  Per arch:
-//    #3 Header
-//    #4 Load Commands
-//    #5 Sections
-//    #6 Relocations
-//    #7 Symbols
-//    #8 Strings
-//
-//===----------------------------------------------------------------------===//
-
-#include "MachOWriter.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/FileWriters.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-using namespace llvm;
-
-namespace llvm { 
-MachineFunctionPass *createMachOWriter(formatted_raw_ostream &O,
-                                       TargetMachine &TM,
-                                       const MCAsmInfo *T, 
-                                       MCCodeEmitter *MCE) { 
-  return new MachOWriter(O, TM, T, MCE);
-}
-}
-
-//===----------------------------------------------------------------------===//
-//                          MachOWriter Implementation
-//===----------------------------------------------------------------------===//
-
-char MachOWriter::ID = 0;
-
-MachOWriter::MachOWriter(formatted_raw_ostream &o, TargetMachine &tm,
-                         const MCAsmInfo *T, MCCodeEmitter *MCE)
-  : MachineFunctionPass(&ID), O(o), TM(tm), MAI(T), MCCE(MCE),
-    OutContext(*new MCContext()),
-    OutStreamer(*createMachOStreamer(OutContext, O, MCCE)) { 
-}
-
-MachOWriter::~MachOWriter() {
-  delete &OutStreamer;
-  delete &OutContext;
-  delete MCCE;
-}
-
-bool MachOWriter::doInitialization(Module &M) {
-  // Initialize TargetLoweringObjectFile.
-  TM.getTargetLowering()->getObjFileLowering().Initialize(OutContext, TM);
-
-  return false;
-}
-
-/// doFinalization - Now that the module has been completely processed, emit
-/// the Mach-O file to 'O'.
-bool MachOWriter::doFinalization(Module &M) {
-  OutStreamer.Finish();
-  return false;
-}
-
-bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  TargetLoweringObjectFile &TLOF = TM.getTargetLowering()->getObjFileLowering();
-  const MCSection *S = TLOF.SectionForGlobal(F, Mang, TM);
-  OutStreamer.SwitchSection(S);
-
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      const MachineInstr *MI = II;
-      MCInst OutMI;
-      OutMI.setOpcode(MI->getOpcode());
-
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = MI->getOperand(i);
-        MCOperand MCOp;
-
-        switch (MO.getType()) {
-          default:
-            MI->dump();
-            llvm_unreachable("unknown operand type");
-          case MachineOperand::MO_Register:
-            // Ignore all implicit register operands.
-            if (MO.isImplicit()) continue;
-            MCOp = MCOperand::CreateReg(MO.getReg());
-            break;
-          case MachineOperand::MO_Immediate:
-            MCOp = MCOperand::CreateImm(MO.getImm());
-            break;
-        }
-        OutMI.addOperand(MCOp);
-      }
-      
-      OutStreamer.EmitInstruction(OutMI);
-    }
-  }
-
-  return false;
-}
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
deleted file mode 100644
index 2e7e67d..0000000
--- a/lib/CodeGen/MachOWriter.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MachOWriter class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MACHOWRITER_H
-#define MACHOWRITER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class GlobalVariable;
-  class Mangler;
-  class MCCodeEmitter;
-  class MCContext;
-  class MCStreamer;
-  
-  /// MachOWriter - This class implements the common target-independent code for
-  /// writing Mach-O files.  Targets should derive a class from this to
-  /// parameterize the output format.
-  ///
-  class MachOWriter : public MachineFunctionPass {
-    static char ID;
-
-  protected:
-    /// Output stream to send the resultant object file to.
-    ///
-    formatted_raw_ostream &O;
-
-    /// Target machine description.
-    ///
-    TargetMachine &TM;
-
-    /// Target Asm Printer information.
-    ///
-    const MCAsmInfo *MAI;
-    
-    /// MCCE - The MCCodeEmitter object that we are exposing to emit machine
-    /// code for functions to the .o file.
-    MCCodeEmitter *MCCE;
-    
-    /// OutContext - This is the context for the output file that we are
-    /// streaming.  This owns all of the global MC-related objects for the
-    /// generated translation unit.
-    MCContext &OutContext;
-    
-    /// OutStreamer - This is the MCStreamer object for the file we are
-    /// generating.  This contains the transient state for the current
-    /// translation unit that we are generating (such as the current section
-    /// etc).
-    MCStreamer &OutStreamer;
-    
-    /// Name-mangler for global names.
-    ///
-    Mangler *Mang;
-    
-    /// doInitialization - Emit the file header and all of the global variables
-    /// for the module to the Mach-O file.
-    bool doInitialization(Module &M);
-
-    /// doFinalization - Now that the module has been completely processed, emit
-    /// the Mach-O file to 'O'.
-    bool doFinalization(Module &M);
-
-    bool runOnMachineFunction(MachineFunction &MF);
-    
-  public:
-    explicit MachOWriter(formatted_raw_ostream &O, TargetMachine &TM,
-                         const MCAsmInfo *T, MCCodeEmitter *MCE);
-    
-    virtual ~MachOWriter();
-    
-    virtual const char *getPassName() const {
-      return "Mach-O Writer";
-    }
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 9215bd5..655a0bf 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -14,15 +14,18 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Assembly/Writer.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -36,6 +39,18 @@ MachineBasicBlock::~MachineBasicBlock() {
   LeakDetector::removeGarbageObject(this);
 }
 
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol(MCContext &Ctx) const {
+  SmallString<60> Name;
+  const MachineFunction *MF = getParent();
+  raw_svector_ostream(Name)
+    << MF->getTarget().getMCAsmInfo()->getPrivateGlobalPrefix() << "BB"
+    << MF->getFunctionNumber() << '_' << getNumber();
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
 raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   MBB.print(OS);
   return OS;
@@ -525,7 +540,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 }
 
 /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
-/// any DEBUG_VALUE instructions.  Return UnknownLoc if there is none.
+/// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
 DebugLoc
 MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
   DebugLoc DL;
@@ -533,8 +548,7 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
   if (MBBI != E) {
     // Skip debug declarations, we don't want a DebugLoc from them.
     MachineBasicBlock::iterator MBBI2 = MBBI;
-    while (MBBI2 != E &&
-           MBBI2->getOpcode()==TargetInstrInfo::DEBUG_VALUE)
+    while (MBBI2 != E && MBBI2->isDebugValue())
       MBBI2++;
     if (MBBI2 != E)
       DL = MBBI2->getDebugLoc();
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 1e3e314..f141c56 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -16,7 +16,6 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -26,12 +25,16 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -70,9 +73,9 @@ FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
   return new Printer(OS, Banner);
 }
 
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // MachineFunction implementation
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 // Out of line virtual method.
 MachineFunctionInfo::~MachineFunctionInfo() {}
@@ -81,8 +84,8 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
-MachineFunction::MachineFunction(Function *F,
-                                 const TargetMachine &TM)
+MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
+                                 unsigned FunctionNum)
   : Fn(F), Target(TM) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
@@ -95,16 +98,8 @@ MachineFunction::MachineFunction(Function *F,
   ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
                      MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
-
-  // Set up jump table.
-  const TargetData &TD = *TM.getTargetData();
-  bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
-  unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
-  unsigned TyAlignment = IsPic ?
-                       TD.getABITypeAlignment(Type::getInt32Ty(F->getContext()))
-                               : TD.getPointerABIAlignment();
-  JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
-                      MachineJumpTableInfo(EntrySize, TyAlignment);
+  FunctionNumber = FunctionNum;
+  JumpTableInfo = 0;
 }
 
 MachineFunction::~MachineFunction() {
@@ -121,9 +116,23 @@ MachineFunction::~MachineFunction() {
   }
   FrameInfo->~MachineFrameInfo();         Allocator.Deallocate(FrameInfo);
   ConstantPool->~MachineConstantPool();   Allocator.Deallocate(ConstantPool);
-  JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo);
+  
+  if (JumpTableInfo) {
+    JumpTableInfo->~MachineJumpTableInfo();
+    Allocator.Deallocate(JumpTableInfo);
+  }
 }
 
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+  if (JumpTableInfo) return JumpTableInfo;
+  
+  JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+    MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+  return JumpTableInfo;
+}
 
 /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
 /// recomputes them.  This guarantees that the MBB numbers are sequential,
@@ -178,7 +187,7 @@ MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
 }
 
 /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
-/// 'Orig' instruction, identical in all ways except the the instruction
+/// 'Orig' instruction, identical in all ways except the instruction
 /// has no parent, prev, or next.
 ///
 MachineInstr *
@@ -311,7 +320,8 @@ void MachineFunction::print(raw_ostream &OS) const {
   FrameInfo->print(*this, OS);
   
   // Print JumpTable Information
-  JumpTableInfo->print(OS);
+  if (JumpTableInfo)
+    JumpTableInfo->print(OS);
 
   // Print Constant Pool
   ConstantPool->print(OS);
@@ -435,6 +445,26 @@ DILocation MachineFunction::getDILocation(DebugLoc DL) const {
   return DILocation(DebugLocInfo.DebugLocations[Idx]);
 }
 
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
+                                        bool isLinkerPrivate) const {
+  assert(JumpTableInfo && "No jump tables");
+  
+  assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+  const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+  
+  const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+                                         MAI.getPrivateGlobalPrefix();
+  SmallString<60> Name;
+  raw_svector_ostream(Name)
+    << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+
 //===----------------------------------------------------------------------===//
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
@@ -528,6 +558,39 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
 
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+  // The size of a jump table entry is 4 bytes unless the entry is just the
+  // address of a block, in which case it is the pointer size.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerSize();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return 4;
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+  // The alignment of a jump table entry is the alignment of int32 unless the
+  // entry is just the address of a block, in which case it is the pointer
+  // alignment.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerABIAlignment();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return TD.getABIIntegerTypeAlignment(32);
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
 /// getJumpTableIndex - Create a new jump table entry in the jump table info
 /// or return an existing one.
 ///
@@ -538,11 +601,11 @@ unsigned MachineJumpTableInfo::getJumpTableIndex(
   return JumpTables.size()-1;
 }
 
+
 /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
 /// the jump tables to branch to New instead.
-bool
-MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
-                                             MachineBasicBlock *New) {
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+                                                  MachineBasicBlock *New) {
   assert(Old != New && "Not making a change?");
   bool MadeChange = false;
   for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
@@ -552,10 +615,9 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
 
 /// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
 /// the jump table to branch to New instead.
-bool
-MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
-                                            MachineBasicBlock *Old,
-                                            MachineBasicBlock *New) {
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+                                                 MachineBasicBlock *Old,
+                                                 MachineBasicBlock *New) {
   assert(Old != New && "Not making a change?");
   bool MadeChange = false;
   MachineJumpTableEntry &JTE = JumpTables[Idx];
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index f5febc5..8d87e3e 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -36,7 +36,7 @@ MachineFunctionAnalysis::~MachineFunctionAnalysis() {
 
 bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
-  MF = new MachineFunction(&F, TM);
+  MF = new MachineFunction(&F, TM, NextFnNum++);
   return false;
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index ef2fcee..b6d98e8 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -127,7 +127,8 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
 void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
-                                      bool isKill, bool isDead, bool isUndef) {
+                                      bool isKill, bool isDead, bool isUndef,
+                                      bool isDebug) {
   // If this operand is already a register operand, use setReg to update the 
   // register's use/def lists.
   if (isReg()) {
@@ -152,6 +153,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsDead = isDead;
   IsUndef = isUndef;
   IsEarlyClobber = false;
+  IsDebug = isDebug;
   SubReg = 0;
 }
 
@@ -303,7 +305,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
 MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
                                      int64_t o, uint64_t s, unsigned int a)
   : Offset(o), Size(s), V(v),
-    Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
@@ -325,7 +327,8 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
 
   if (MMO->getBaseAlignment() >= getBaseAlignment()) {
     // Update the alignment value.
-    Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+    Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+      ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
     // Also update the base and offset, because the new alignment may
     // not be applicable with the old ones.
     V = MMO->getValue();
@@ -740,20 +743,6 @@ unsigned MachineInstr::getNumExplicitOperands() const {
 }
 
 
-/// isLabel - Returns true if the MachineInstr represents a label.
-///
-bool MachineInstr::isLabel() const {
-  return getOpcode() == TargetInstrInfo::DBG_LABEL ||
-         getOpcode() == TargetInstrInfo::EH_LABEL ||
-         getOpcode() == TargetInstrInfo::GC_LABEL;
-}
-
-/// isDebugLabel - Returns true if the MachineInstr represents a debug label.
-///
-bool MachineInstr::isDebugLabel() const {
-  return getOpcode() == TargetInstrInfo::DBG_LABEL;
-}
-
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
@@ -819,7 +808,7 @@ int MachineInstr::findFirstPredOperandIdx() const {
 /// first tied use operand index by reference is UseOpIdx is not null.
 bool MachineInstr::
 isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
-  if (getOpcode() == TargetInstrInfo::INLINEASM) {
+  if (isInlineAsm()) {
     assert(DefOpIdx >= 2);
     const MachineOperand &MO = getOperand(DefOpIdx);
     if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
@@ -878,7 +867,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
 /// operand index by reference.
 bool MachineInstr::
 isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
-  if (getOpcode() == TargetInstrInfo::INLINEASM) {
+  if (isInlineAsm()) {
     const MachineOperand &MO = getOperand(UseOpIdx);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
       return false;
@@ -1046,7 +1035,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 
 /// isInvariantLoad - Return true if this instruction is loading from a
 /// location whose value is invariant across the function.  For example,
-/// loading a value from the constant pool or from from the argument area
+/// loading a value from the constant pool or from the argument area
 /// of a function if it does not change.  This should only return true of
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
@@ -1088,7 +1077,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
 /// merges together the same virtual register, return the register, otherwise
 /// return 0.
 unsigned MachineInstr::isConstantValuePHI() const {
-  if (getOpcode() != TargetInstrInfo::PHI)
+  if (!isPHI())
     return 0;
   assert(getNumOperands() >= 3 &&
          "It's illegal to have a PHI without source operands");
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index ffcc8ab..92c84f3 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -336,7 +336,7 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
   for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
          UE = RegInfo->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
-    if (UseMI->getOpcode() == TargetInstrInfo::PHI)
+    if (UseMI->isPHI())
       return true;
   }
   return false;
@@ -363,7 +363,7 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+  if (MI.isImplicitDef())
     return false;
 
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index ed5bb5e..5052af7 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -40,6 +40,7 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
 MachineModuleInfo::MachineModuleInfo()
 : ImmutablePass(&ID)
 , ObjFileMMI(0)
+, CurCallSite(0)
 , CallsEHReturn(0)
 , CallsUnwindInit(0)
 , DbgInfoAvailable(false) {
@@ -71,6 +72,7 @@ void MachineModuleInfo::EndFunction() {
 
   // Clean up exception info.
   LandingPads.clear();
+  CallSiteMap.clear();
   TypeInfos.clear();
   FilterIds.clear();
   FilterEnds.clear();
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 7a62929..39d2c75 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -22,22 +22,23 @@ using namespace llvm;
 
 // Out of line virtual method.
 void MachineModuleInfoMachO::Anchor() {}
-
+void MachineModuleInfoELF::Anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
   const MCSymbol *LHSS =
-    ((const std::pair<const MCSymbol*, const MCSymbol*>*)LHS)->first;
+    ((const std::pair<MCSymbol*, MCSymbol*>*)LHS)->first;
   const MCSymbol *RHSS =
-    ((const std::pair<const MCSymbol*, const MCSymbol*>*)RHS)->first;
+    ((const std::pair<MCSymbol*, MCSymbol*>*)RHS)->first;
   return LHSS->getName().compare(RHSS->getName());
 }
 
 /// GetSortedStubs - Return the entries from a DenseMap in a deterministic
 /// sorted orer.
-MachineModuleInfoMachO::SymbolListTy
-MachineModuleInfoMachO::GetSortedStubs(const DenseMap<const MCSymbol*, 
-                                                      const MCSymbol*> &Map) {
-  MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+                                                     MCSymbol*> &Map) {
+  MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
   return List;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 467ea5d..2255dc3 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -92,13 +93,13 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
     return 0;
 
   MachineBasicBlock::iterator I = BB->front();
-  if (I->getOpcode() != TargetInstrInfo::PHI)
+  if (!I->isPHI())
     return 0;
 
   AvailableValsTy AVals;
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
     AVals[PredValues[i].first] = PredValues[i].second;
-  while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+  while (I != BB->end() && I->isPHI()) {
     bool Same = true;
     for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
       unsigned SrcReg = I->getOperand(i).getReg();
@@ -155,7 +156,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If there are no predecessors, just return undef.
   if (BB->pred_empty()) {
     // Insert an implicit_def to represent an undef value.
-    MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
                                         VRC, MRI, TII);
     return NewDef->getOperand(0).getReg();
@@ -192,7 +193,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
 
   // Otherwise, we do need a PHI: insert one now.
   MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-  MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB,
+  MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
                                            Loc, VRC, MRI, TII);
 
   // Fill in all the predecessors of the PHI.
@@ -231,7 +232,7 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
 void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
   MachineInstr *UseMI = U.getParent();
   unsigned NewVR = 0;
-  if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+  if (UseMI->isPHI()) {
     MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
     NewVR = GetValueAtEndOfBlockInternal(SourceBB);
   } else {
@@ -277,7 +278,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
     // it.  When we get back to the first instance of the recursion we will fill
     // in the PHI node.
     MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+    MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
                                         VRC, MRI,TII);
     unsigned NewVR = NewPHI->getOperand(0).getReg();
     InsertRes.first->second = NewVR;
@@ -289,7 +290,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   // be invalidated.
   if (BB->pred_empty()) {
     // Insert an implicit_def to represent an undef value.
-    MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
                                         BB, BB->getFirstTerminator(),
                                         VRC, MRI, TII);
     return InsertRes.first->second = NewDef->getOperand(0).getReg();
@@ -358,7 +359,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
   MachineInstr *InsertedPHI;
   if (InsertedVal == 0) {
     MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
-    InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+    InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
                                VRC, MRI, TII);
     InsertedVal = InsertedPHI->getOperand(0).getReg();
   } else {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c177e3c..c391576 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -77,7 +77,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
     // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
-    if (UseInst->getOpcode() == TargetInstrInfo::PHI) {
+    if (UseInst->isPHI()) {
       // PHI nodes use the operand in the predecessor block, not the block with
       // the PHI.
       UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
@@ -269,8 +269,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   
   // Determine where to insert into.  Skip phi nodes.
   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
-  while (InsertPos != SuccToSinkTo->end() && 
-         InsertPos->getOpcode() == TargetInstrInfo::PHI)
+  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
     ++InsertPos;
   
   // Move the instruction.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 584c21b..434a1e8 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -590,7 +590,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           // must be live in. PHI instructions are handled separately.
           if (MInfo.regsKilled.count(Reg))
             report("Using a killed virtual register", MO, MONum);
-          else if (MI->getOpcode() != TargetInstrInfo::PHI)
+          else if (!MI->isPHI())
             MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
         }
       }
@@ -650,10 +650,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   }
 
   case MachineOperand::MO_MachineBasicBlock:
-    if (MI->getOpcode() == TargetInstrInfo::PHI) {
-      if (!MO->getMBB()->isSuccessor(MI->getParent()))
-        report("PHI operand is not in the CFG", MO, MONum);
-    }
+    if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+      report("PHI operand is not in the CFG", MO, MONum);
     break;
 
   default:
@@ -783,7 +781,7 @@ void MachineVerifier::calcRegsRequired() {
 // calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
-       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+       BBI != BBE && BBI->isPHI(); ++BBI) {
     DenseSet<const MachineBasicBlock*> seen;
 
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
index 8c0204c..4ab3e3c 100644
--- a/lib/CodeGen/Makefile
+++ b/lib/CodeGen/Makefile
@@ -11,7 +11,6 @@ LEVEL = ../..
 LIBRARYNAME = LLVMCodeGen
 PARALLEL_DIRS = SelectionDAG AsmPrinter
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
index 096f9d4..acb6869 100644
--- a/lib/CodeGen/OptimizeExts.cpp
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -110,7 +110,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
       MachineInstr *UseMI = &*UI;
       if (UseMI == MI)
         continue;
-      if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+      if (UseMI->isPHI()) {
         ExtendLife = false;
         continue;
       }
@@ -150,7 +150,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
       UI = MRI->use_begin(DstReg);
       for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
            ++UI)
-        if (UI->getOpcode() == TargetInstrInfo::PHI)
+        if (UI->isPHI())
           PHIBBs.insert(UI->getParent());
 
       const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
@@ -162,7 +162,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
           continue;
         unsigned NewVR = MRI->createVirtualRegister(RC);
         BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
-                TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
+                TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR)
           .addReg(DstReg).addImm(SubIdx);
         UseMO->setReg(NewVR);
         ++NumReuse;
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 0000000..2717d4d
--- /dev/null
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,189 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+  class OptimizePHIs : public MachineFunctionPass {
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification
+    OptimizePHIs() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+    typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+    bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+                               InstrSet &PHIsInCycle);
+    bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+    bool OptimizeBB(MachineBasicBlock &MBB);
+  };
+}
+
+char OptimizePHIs::ID = 0;
+static RegisterPass<OptimizePHIs>
+X("opt-phis", "Optimize machine instruction PHIs");
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+  MRI = &Fn.getRegInfo();
+  TII = Fn.getTarget().getInstrInfo();
+
+  // Find dead PHI cycles and PHI cycles that can be replaced by a single
+  // value.  InstCombine does these optimizations, but DAG legalization may
+  // introduce new opportunities, e.g., when i64 values are split up for
+  // 32-bit targets.
+  bool Changed = false;
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    Changed |= OptimizeBB(*I);
+
+  return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs.  If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value.  PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+                                         unsigned &SingleValReg,
+                                         InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  // Scan the PHI operands.
+  for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+    unsigned SrcReg = MI->getOperand(i).getReg();
+    if (SrcReg == DstReg)
+      continue;
+    MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+    // Skip over register-to-register moves.
+    unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
+    if (SrcMI &&
+        TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
+        SrcSubIdx == 0 && DstSubIdx == 0 &&
+        TargetRegisterInfo::isVirtualRegister(MvSrcReg))
+      SrcMI = MRI->getVRegDef(MvSrcReg);
+    if (!SrcMI)
+      return false;
+
+    if (SrcMI->isPHI()) {
+      if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+        return false;
+    } else {
+      // Fail if there is more than one non-phi/non-move register.
+      if (SingleValReg != 0)
+        return false;
+      SingleValReg = SrcReg;
+    }
+  }
+  return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+         "PHI destination is not a virtual register");
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+         E = MRI->use_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+      return false;
+  }
+
+  return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator
+         MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+    MachineInstr *MI = &*MII++;
+    if (!MI->isPHI())
+      break;
+
+    // Check for single-value PHI cycles.
+    unsigned SingleValReg = 0;
+    InstrSet PHIsInCycle;
+    if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+        SingleValReg != 0) {
+      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      MI->eraseFromParent();
+      ++NumPHICycles;
+      Changed = true;
+      continue;
+    }
+
+    // Check for dead PHI cycles.
+    PHIsInCycle.clear();
+    if (IsDeadPHICycle(MI, PHIsInCycle)) {
+      for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+           PI != PE; ++PI) {
+        MachineInstr *PhiMI = *PI;
+        if (&*MII == PhiMI)
+          ++MII;
+        PhiMI->eraseFromParent();
+      }
+      ++NumDeadPHICycles;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h
deleted file mode 100644
index 738dea0..0000000
--- a/lib/CodeGen/PBQP/AnnotatedGraph.h
+++ /dev/null
@@ -1,184 +0,0 @@
-//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Annotated PBQP Graph class. This class is used internally by the PBQP solver
-// to cache information to speed up reduction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
-#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
-
-#include "GraphBase.h"
-
-namespace PBQP {
-
-
-template <typename NodeData, typename EdgeData> class AnnotatedEdge;
-
-template <typename NodeData, typename EdgeData>
-class AnnotatedNode : public NodeBase<AnnotatedNode<NodeData, EdgeData>,
-                                      AnnotatedEdge<NodeData, EdgeData> > {
-private:
-
-  NodeData nodeData; 
-
-public:
-
-  AnnotatedNode(const Vector &costs, const NodeData &nodeData) :
-    NodeBase<AnnotatedNode<NodeData, EdgeData>,
-             AnnotatedEdge<NodeData, EdgeData> >(costs),
-             nodeData(nodeData) {}
-
-  NodeData& getNodeData() { return nodeData; }
-  const NodeData& getNodeData() const { return nodeData; }
-
-};
-
-template <typename NodeData, typename EdgeData>
-class AnnotatedEdge : public EdgeBase<AnnotatedNode<NodeData, EdgeData>,
-                                      AnnotatedEdge<NodeData, EdgeData> > {
-private:
-
-  typedef typename GraphBase<AnnotatedNode<NodeData, EdgeData>,
-                             AnnotatedEdge<NodeData, EdgeData> >::NodeIterator
-    NodeIterator;
-
-  EdgeData edgeData; 
-
-public:
-
-
-  AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
-                const Matrix &costs, const EdgeData &edgeData) :
-    EdgeBase<AnnotatedNode<NodeData, EdgeData>,
-             AnnotatedEdge<NodeData, EdgeData> >(node1Itr, node2Itr, costs),
-    edgeData(edgeData) {}
-
-  EdgeData& getEdgeData() { return edgeData; }
-  const EdgeData& getEdgeData() const { return edgeData; }
-
-};
-
-template <typename NodeData, typename EdgeData>
-class AnnotatedGraph : public GraphBase<AnnotatedNode<NodeData, EdgeData>,
-                                        AnnotatedEdge<NodeData, EdgeData> > {
-private:
-
-  typedef GraphBase<AnnotatedNode<NodeData, EdgeData>,
-                    AnnotatedEdge<NodeData, EdgeData> > PGraph;
-
-  typedef AnnotatedNode<NodeData, EdgeData> NodeEntry;
-  typedef AnnotatedEdge<NodeData, EdgeData> EdgeEntry;
-
-
-  void copyFrom(const AnnotatedGraph &other) {
-    if (!other.areNodeIDsValid()) {
-      other.assignNodeIDs();
-    }
-    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
-
-    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
-         nItr != nEnd; ++nItr) {
-      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
-    }
-
-    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
-         eItr != eEnd; ++eItr) {
-
-      unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)),
-               node2ID = other.getNodeID(other.getEdgeNode2(eItr));
-
-      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
-              other.getEdgeCosts(eItr), other.getEdgeData(eItr));
-    }
-
-  }
-
-public:
-
-  typedef typename PGraph::NodeIterator NodeIterator;
-  typedef typename PGraph::ConstNodeIterator ConstNodeIterator;
-  typedef typename PGraph::EdgeIterator EdgeIterator;
-  typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator;
-
-  AnnotatedGraph() {}
-
-  AnnotatedGraph(const AnnotatedGraph &other) {
-    copyFrom(other);
-  }
-
-  AnnotatedGraph& operator=(const AnnotatedGraph &other) {
-    PGraph::clear();
-    copyFrom(other);
-    return *this;
-  }
-
-  NodeIterator addNode(const Vector &costs, const NodeData &data) {
-    return PGraph::addConstructedNode(NodeEntry(costs, data));
-  }
-
-  EdgeIterator addEdge(const NodeIterator &node1Itr,
-                       const NodeIterator &node2Itr,
-                       const Matrix &costs, const EdgeData &data) {
-    return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr,
-                                                costs, data));
-  }
-
-  NodeData& getNodeData(const NodeIterator &nodeItr) {
-    return PGraph::getNodeEntry(nodeItr).getNodeData();
-  }
-
-  const NodeData& getNodeData(const NodeIterator &nodeItr) const {
-    return PGraph::getNodeEntry(nodeItr).getNodeData();
-  }
-
-  EdgeData& getEdgeData(const EdgeIterator &edgeItr) {
-    return PGraph::getEdgeEntry(edgeItr).getEdgeData();
-  }
-
-  const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const {
-    return PGraph::getEdgeEntry(edgeItr).getEdgeData();
-  }
-
-  SimpleGraph toSimpleGraph() const {
-    SimpleGraph g;
-
-    if (!PGraph::areNodeIDsValid()) {
-      PGraph::assignNodeIDs();
-    }
-    std::vector<SimpleGraph::NodeIterator> newNodeItrs(PGraph::getNumNodes());
-
-    for (ConstNodeIterator nItr = PGraph::nodesBegin(), 
-         nEnd = PGraph::nodesEnd();
-         nItr != nEnd; ++nItr) {
-
-      newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr));
-    }
-
-    for (ConstEdgeIterator
-         eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd();
-         eItr != eEnd; ++eItr) {
-
-      unsigned node1ID = getNodeID(getEdgeNode1(eItr)),
-               node2ID = getNodeID(getEdgeNode2(eItr));
-
-        g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
-                  getEdgeCosts(eItr));
-    }
-
-    return g;
-  }
-
-};
-
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H
diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h
deleted file mode 100644
index 35ec4f1..0000000
--- a/lib/CodeGen/PBQP/ExhaustiveSolver.h
+++ /dev/null
@@ -1,110 +0,0 @@
-//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Uses a trivial brute force algorithm to solve a PBQP problem.
-// PBQP is NP-HARD - This solver should only be used for debugging small
-// problems.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
-#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H
-
-#include "Solver.h"
-
-namespace PBQP {
-
-/// A brute force PBQP solver. This solver takes exponential time. It should
-/// only be used for debugging purposes. 
-class ExhaustiveSolverImpl {
-private:
-
-  const SimpleGraph &g;
-
-  PBQPNum getSolutionCost(const Solution &solution) const {
-    PBQPNum cost = 0.0;
-    
-    for (SimpleGraph::ConstNodeIterator
-         nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-      
-      unsigned nodeId = g.getNodeID(nodeItr);
-
-      cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
-    }
-
-    for (SimpleGraph::ConstEdgeIterator
-         edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-      
-      SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr),
-                                     n2 = g.getEdgeNode2Itr(edgeItr);
-      unsigned sol1 = solution.getSelection(g.getNodeID(n1)),
-               sol2 = solution.getSelection(g.getNodeID(n2));
-
-      cost += g.getEdgeCosts(edgeItr)[sol1][sol2];
-    }
-
-    return cost;
-  }
-
-public:
-
-  ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {}
-
-  Solution solve() const {
-    Solution current(g.getNumNodes(), true), optimal(current);
-
-    PBQPNum bestCost = std::numeric_limits<PBQPNum>::infinity();
-    bool finished = false;
-
-    while (!finished) {
-      PBQPNum currentCost = getSolutionCost(current);
-
-      if (currentCost < bestCost) {
-        optimal = current;
-        bestCost = currentCost;
-      }
-
-      // assume we're done.
-      finished = true;
-
-      for (unsigned i = 0; i < g.getNumNodes(); ++i) {
-        if (current.getSelection(i) ==
-            (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) {
-          current.setSelection(i, 0);
-        }
-        else {
-          current.setSelection(i, current.getSelection(i) + 1);
-          finished = false;
-          break;
-        }
-      }
-
-    }
-
-    optimal.setSolutionCost(bestCost);
-
-    return optimal;
-  }
-
-};
-
-class ExhaustiveSolver : public Solver {
-public:
-  ~ExhaustiveSolver() {}
-  Solution solve(const SimpleGraph &g) const {
-    ExhaustiveSolverImpl solver(g);
-    return solver.solve();
-  }
-};
-
-}
-
-#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP
diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h
new file mode 100644
index 0000000..b2224cb
--- /dev/null
+++ b/lib/CodeGen/PBQP/Graph.h
@@ -0,0 +1,425 @@
+//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Graph class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
+#define LLVM_CODEGEN_PBQP_GRAPH_H
+
+#include "Math.h"
+
+#include <list>
+#include <vector>
+#include <map>
+
+namespace PBQP {
+
+  /// PBQP Graph class.
+  /// Instances of this class describe PBQP problems.
+  class Graph {
+  private:
+
+    // ----- TYPEDEFS -----
+    class NodeEntry;
+    class EdgeEntry;
+
+    typedef std::list<NodeEntry> NodeList;
+    typedef std::list<EdgeEntry> EdgeList;
+
+  public:
+
+    typedef NodeList::iterator NodeItr;
+    typedef NodeList::const_iterator ConstNodeItr;
+
+    typedef EdgeList::iterator EdgeItr;
+    typedef EdgeList::const_iterator ConstEdgeItr;
+
+  private:
+
+    typedef std::list<EdgeItr> AdjEdgeList;
+  
+  public:
+
+    typedef AdjEdgeList::iterator AdjEdgeItr;
+
+  private:
+
+    class NodeEntry {
+    private:
+      Vector costs;      
+      AdjEdgeList adjEdges;
+      unsigned degree;
+      void *data;
+    public:
+      NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
+      Vector& getCosts() { return costs; }
+      const Vector& getCosts() const { return costs; }
+      unsigned getDegree() const { return degree; }
+      AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
+      AdjEdgeItr edgesEnd() { return adjEdges.end(); }
+      AdjEdgeItr addEdge(EdgeItr e) {
+        ++degree;
+        return adjEdges.insert(adjEdges.end(), e);
+      }
+      void removeEdge(AdjEdgeItr ae) {
+        --degree;
+        adjEdges.erase(ae);
+      }
+      void setData(void *data) { this->data = data; }
+      void* getData() { return data; }
+    };
+
+    class EdgeEntry {
+    private:
+      NodeItr node1, node2;
+      Matrix costs;
+      AdjEdgeItr node1AEItr, node2AEItr;
+      void *data;
+    public:
+      EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
+        : node1(node1), node2(node2), costs(costs) {}
+      NodeItr getNode1() const { return node1; }
+      NodeItr getNode2() const { return node2; }
+      Matrix& getCosts() { return costs; }
+      const Matrix& getCosts() const { return costs; }
+      void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
+      AdjEdgeItr getNode1AEItr() { return node1AEItr; }
+      void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
+      AdjEdgeItr getNode2AEItr() { return node2AEItr; }
+      void setData(void *data) { this->data = data; }
+      void *getData() { return data; }
+    };
+
+    // ----- MEMBERS -----
+
+    NodeList nodes;
+    unsigned numNodes;
+
+    EdgeList edges;
+    unsigned numEdges;
+
+    // ----- INTERNAL METHODS -----
+
+    NodeEntry& getNode(NodeItr nItr) { return *nItr; }
+    const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
+
+    EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
+    const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
+
+    NodeItr addConstructedNode(const NodeEntry &n) {
+      ++numNodes;
+      return nodes.insert(nodes.end(), n);
+    }
+
+    EdgeItr addConstructedEdge(const EdgeEntry &e) {
+      assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
+             "Attempt to add duplicate edge.");
+      ++numEdges;
+      EdgeItr edgeItr = edges.insert(edges.end(), e);
+      EdgeEntry &ne = getEdge(edgeItr);
+      NodeEntry &n1 = getNode(ne.getNode1());
+      NodeEntry &n2 = getNode(ne.getNode2());
+      // Sanity check on matrix dimensions:
+      assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
+             (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
+             "Edge cost dimensions do not match node costs dimensions.");
+      ne.setNode1AEItr(n1.addEdge(edgeItr));
+      ne.setNode2AEItr(n2.addEdge(edgeItr));
+      return edgeItr;
+    }
+
+    inline void copyFrom(const Graph &other);
+  public:
+
+    /// \brief Construct an empty PBQP graph.
+    Graph() : numNodes(0), numEdges(0) {}
+
+    /// \brief Copy construct this graph from "other". Note: Does not copy node
+    ///        and edge data, only graph structure and costs.
+    /// @param other Source graph to copy from.
+    Graph(const Graph &other) : numNodes(0), numEdges(0) {
+      copyFrom(other);
+    }
+
+    /// \brief Make this graph a copy of "other". Note: Does not copy node and
+    ///        edge data, only graph structure and costs.
+    /// @param other The graph to copy from.
+    /// @return A reference to this graph.
+    ///
+    /// This will clear the current graph, erasing any nodes and edges added,
+    /// before copying from other.
+    Graph& operator=(const Graph &other) {
+      clear();      
+      copyFrom(other);
+      return *this;
+    }
+
+    /// \brief Add a node with the given costs.
+    /// @param costs Cost vector for the new node.
+    /// @return Node iterator for the added node.
+    NodeItr addNode(const Vector &costs) {
+      return addConstructedNode(NodeEntry(costs));
+    }
+
+    /// \brief Add an edge between the given nodes with the given costs.
+    /// @param n1Itr First node.
+    /// @param n2Itr Second node.
+    /// @return Edge iterator for the added edge.
+    EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
+                    const Matrix &costs) {
+      assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
+             getNodeCosts(n2Itr).getLength() == costs.getCols() &&
+             "Matrix dimensions mismatch.");
+      return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); 
+    }
+
+    /// \brief Get the number of nodes in the graph.
+    /// @return Number of nodes in the graph.
+    unsigned getNumNodes() const { return numNodes; }
+
+    /// \brief Get the number of edges in the graph.
+    /// @return Number of edges in the graph.
+    unsigned getNumEdges() const { return numEdges; }
+
+    /// \brief Get a node's cost vector.
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
+
+    /// \brief Get a node's cost vector (const version).
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    const Vector& getNodeCosts(ConstNodeItr nItr) const {
+      return getNode(nItr).getCosts();
+    }
+
+    /// \brief Set a node's data pointer.
+    /// @param nItr Node iterator.
+    /// @param data Pointer to node data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
+
+    /// \brief Get the node's data pointer.
+    /// @param nItr Node iterator.
+    /// @return Pointer to node data.
+    void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
+    
+    /// \brief Get an edge's cost matrix.
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
+
+    /// \brief Get an edge's cost matrix (const version).
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
+      return getEdge(eItr).getCosts();
+    }
+
+    /// \brief Set an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @param data Pointer to edge data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
+
+    /// \brief Get an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @return Pointer to edge data. 
+    void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
+
+    /// \brief Get a node's degree.
+    /// @param nItr Node iterator.
+    /// @return The degree of the node.
+    unsigned getNodeDegree(NodeItr nItr) const {
+      return getNode(nItr).getDegree();
+    }
+
+    /// \brief Begin iterator for node set.
+    NodeItr nodesBegin() { return nodes.begin(); }
+
+    /// \brief Begin const iterator for node set.
+    ConstNodeItr nodesBegin() const { return nodes.begin(); }
+
+    /// \brief End iterator for node set.
+    NodeItr nodesEnd() { return nodes.end(); }
+
+    /// \brief End const iterator for node set.
+    ConstNodeItr nodesEnd() const { return nodes.end(); }
+
+    /// \brief Begin iterator for edge set.
+    EdgeItr edgesBegin() { return edges.begin(); }
+
+    /// \brief End iterator for edge set.
+    EdgeItr edgesEnd() { return edges.end(); }
+
+    /// \brief Get begin iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
+      return getNode(nItr).edgesBegin();
+    }
+
+    /// \brief Get end iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
+      return getNode(nItr).edgesEnd();
+    }
+
+    /// \brief Get the first node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The first node connected to the given edge. 
+    NodeItr getEdgeNode1(EdgeItr eItr) {
+      return getEdge(eItr).getNode1();
+    }
+
+    /// \brief Get the second node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The second node connected to the given edge. 
+    NodeItr getEdgeNode2(EdgeItr eItr) {
+      return getEdge(eItr).getNode2();
+    } 
+
+    /// \brief Get the "other" node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @param nItr Node iterator for the "given" node.
+    /// @return The iterator for the "other" node connected to this edge. 
+    NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
+      EdgeEntry &e = getEdge(eItr);
+      if (e.getNode1() == nItr) {
+        return e.getNode2();
+      } // else
+      return e.getNode1();
+    }
+
+    /// \brief Get the edge connecting two nodes.
+    /// @param n1Itr First node iterator.
+    /// @param n2Itr Second node iterator.
+    /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
+    ///         otherwise returns edgesEnd(). 
+    EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
+      for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
+         aeItr != aeEnd; ++aeItr) {
+        if ((getEdgeNode1(*aeItr) == n2Itr) ||
+            (getEdgeNode2(*aeItr) == n2Itr)) {
+          return *aeItr;
+        }
+      }
+      return edges.end();
+    }
+
+    /// \brief Remove a node from the graph.
+    /// @param nItr Node iterator.
+    void removeNode(NodeItr nItr) {
+      NodeEntry &n = getNode(nItr);
+      for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
+        EdgeItr eItr = *itr;
+        ++itr;
+        removeEdge(eItr); 
+      }
+      nodes.erase(nItr);
+      --numNodes;
+    }
+
+    /// \brief Remove an edge from the graph.
+    /// @param eItr Edge iterator.
+    void removeEdge(EdgeItr eItr) {
+      EdgeEntry &e = getEdge(eItr);
+      NodeEntry &n1 = getNode(e.getNode1());
+      NodeEntry &n2 = getNode(e.getNode2());
+      n1.removeEdge(e.getNode1AEItr());
+      n2.removeEdge(e.getNode2AEItr());
+      edges.erase(eItr);
+      --numEdges;
+    }
+
+    /// \brief Remove all nodes and edges from the graph.
+    void clear() {
+      nodes.clear();
+      edges.clear();
+      numNodes = numEdges = 0;
+    }
+
+    /// \brief Print a representation of this graph in DOT format.
+    /// @param os Output stream to print on.
+    template <typename OStream>
+    void printDot(OStream &os) {
+    
+      os << "graph {\n";
+
+      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+           nodeItr != nodeEnd; ++nodeItr) {
+
+        os << "  node" << nodeItr << " [ label=\""
+           << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+      }
+
+      os << "  edge [ len=" << getNumNodes() << " ]\n";
+
+      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+           edgeItr != edgeEnd; ++edgeItr) {
+
+        os << "  node" << getEdgeNode1(edgeItr)
+           << " -- node" << getEdgeNode2(edgeItr)
+           << " [ label=\"";
+
+        const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+        for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+          os << edgeCosts.getRowAsVector(i) << "\\n";
+        }
+        os << "\" ]\n";
+      }
+      os << "}\n";
+    }
+
+  };
+
+  class NodeItrComparator {
+  public:
+    bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+
+    bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+  };
+
+  class EdgeItrCompartor {
+  public:
+    bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+
+    bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+  };
+
+  void Graph::copyFrom(const Graph &other) {
+    std::map<Graph::ConstNodeItr, Graph::NodeItr,
+             NodeItrComparator> nodeMap;
+
+     for (Graph::ConstNodeItr nItr = other.nodesBegin(),
+                             nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
+    }
+      
+  }
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h
deleted file mode 100644
index becd98a..0000000
--- a/lib/CodeGen/PBQP/GraphBase.h
+++ /dev/null
@@ -1,582 +0,0 @@
-//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Base class for PBQP Graphs.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H
-#define LLVM_CODEGEN_PBQP_GRAPHBASE_H
-
-#include "PBQPMath.h"
-
-#include <list>
-#include <vector>
-
-namespace PBQP {
-
-// UGLY, but I'm not sure there's a good way around this: We need to be able to
-// look up a Node's "adjacent edge list" structure type before the Node type is
-// fully constructed.  We can enable this by pushing the choice of data type
-// out into this traits class.
-template <typename Graph>
-class NodeBaseTraits {
-  public:
-    typedef std::list<typename Graph::EdgeIterator> AdjEdgeList;
-    typedef typename AdjEdgeList::iterator AdjEdgeIterator;
-    typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator;
-};
-
-/// \brief Base for concrete graph classes. Provides a basic set of graph
-///        operations which are useful for PBQP solvers.
-template <typename NodeEntry, typename EdgeEntry>
-class GraphBase {
-private:
-
-  typedef GraphBase<NodeEntry, EdgeEntry> ThisGraphT;
-
-  typedef std::list<NodeEntry> NodeList;
-  typedef std::list<EdgeEntry> EdgeList;
-
-  NodeList nodeList;
-  unsigned nodeListSize;
-
-  EdgeList edgeList;
-  unsigned edgeListSize;
-
-  GraphBase(const ThisGraphT &other) { abort(); }
-  void operator=(const ThisGraphT &other) { abort(); } 
-  
-public:
-
-  /// \brief Iterates over the nodes of a graph.
-  typedef typename NodeList::iterator NodeIterator;
-  /// \brief Iterates over the nodes of a const graph.
-  typedef typename NodeList::const_iterator ConstNodeIterator;
-  /// \brief Iterates over the edges of a graph.
-  typedef typename EdgeList::iterator EdgeIterator;
-  /// \brief Iterates over the edges of a const graph.
-  typedef typename EdgeList::const_iterator ConstEdgeIterator;
-
-  /// \brief Iterates over the edges attached to a node.
-  typedef typename NodeBaseTraits<ThisGraphT>::AdjEdgeIterator
-    AdjEdgeIterator;
-
-  /// \brief Iterates over the edges attached to a node in a const graph.
-  typedef typename NodeBaseTraits<ThisGraphT>::ConstAdjEdgeIterator
-    ConstAdjEdgeIterator;
-
-private:
-
-  typedef std::vector<NodeIterator> IDToNodeMap;
-
-  IDToNodeMap idToNodeMap;
-  bool nodeIDsValid;
-
-  void invalidateNodeIDs() {
-    if (nodeIDsValid) {
-      idToNodeMap.clear();
-      nodeIDsValid = false;
-    }
-  }
-
-  template <typename ItrT>
-  bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) {
-    for (ItrT t = begin; t != end; ++t) {
-      if (itr == t)
-        return true;
-    }
-
-    return false;
-  }
-
-protected:
-
-  GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {}
-  
-  NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; }
-  const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const {
-    return *nodeItr;
-  }
-
-  EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; }
-  const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const {
-    return *edgeItr;
-  }
-
-  NodeIterator addConstructedNode(const NodeEntry &nodeEntry) {
-    ++nodeListSize;
-
-    invalidateNodeIDs();
-
-    NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry);
-
-    return newNodeItr;
-  }
-
-  EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) {
-
-    assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr())
-          == edgeList.end()) && "Attempt to add duplicate edge.");
-
-    ++edgeListSize;
-
-    // Add the edge to the graph.
-    EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry);
-
-    // Get a reference to the version in the graph.
-    EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr);
-
-    // Node entries:
-    NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()),
-              &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr());
-
-    // Sanity check on matrix dimensions.
-    assert((node1Entry.getCosts().getLength() == 
-            newEdgeEntry.getCosts().getRows()) && 
-           (node2Entry.getCosts().getLength() == 
-            newEdgeEntry.getCosts().getCols()) &&
-        "Matrix dimensions do not match cost vector dimensions.");
-
-    // Create links between nodes and edges.
-    newEdgeEntry.setNode1ThisEdgeItr(
-        node1Entry.addAdjEdge(edgeItr));
-    newEdgeEntry.setNode2ThisEdgeItr(
-        node2Entry.addAdjEdge(edgeItr));
-
-    return edgeItr;
-  }
-
-public:
-
-  /// \brief Returns the number of nodes in this graph.
-  unsigned getNumNodes() const { return nodeListSize; }
-
-  /// \brief Returns the number of edges in this graph.
-  unsigned getNumEdges() const { return edgeListSize; } 
-
-  /// \brief Return the cost vector for the given node.
-  Vector& getNodeCosts(const NodeIterator &nodeItr) {
-    return getNodeEntry(nodeItr).getCosts();
-  }
-
-  /// \brief Return the cost vector for the give node. 
-  const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const {
-    return getNodeEntry(nodeItr).getCosts();
-  }
-
-  /// \brief Return the degree of the given node.
-  unsigned getNodeDegree(const NodeIterator &nodeItr) const {
-    return getNodeEntry(nodeItr).getDegree();
-  }
-
-  /// \brief Assigns sequential IDs to the nodes, starting at 0, which
-  /// remain valid until the next addition or removal of a node.
-  void assignNodeIDs() {
-    unsigned curID = 0;
-    idToNodeMap.resize(getNumNodes());
-    for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr, ++curID) {
-      getNodeEntry(nodeItr).setID(curID);
-      idToNodeMap[curID] = nodeItr;
-    }
-    nodeIDsValid = true;
-  }
-
-  /// \brief Assigns sequential IDs to the nodes using the ordering of the
-  /// given vector.
-  void assignNodeIDs(const std::vector<NodeIterator> &nodeOrdering) {
-    assert((getNumNodes() == nodeOrdering.size()) && 
-           "Wrong number of nodes in node ordering.");
-    idToNodeMap = nodeOrdering;
-    for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) {
-      getNodeEntry(idToNodeMap[nodeID]).setID(nodeID);
-    }
-    nodeIDsValid = true;
-  }
-
-  /// \brief Returns true if valid node IDs are assigned, false otherwise.
-  bool areNodeIDsValid() const { return nodeIDsValid; }
-
-  /// \brief Return the numeric ID of the given node.
-  ///
-  /// Calls to this method will result in an assertion failure if there have
-  /// been any node additions or removals since the last call to
-  /// assignNodeIDs().
-  unsigned getNodeID(const ConstNodeIterator &nodeItr) const {
-    assert(nodeIDsValid && "Attempt to retrieve invalid ID.");
-    return getNodeEntry(nodeItr).getID();
-  }
-
-  /// \brief Returns the iterator associated with the given node ID.
-  NodeIterator getNodeItr(unsigned nodeID) {
-    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
-    return idToNodeMap[nodeID];
-  }
-
-  /// \brief Returns the iterator associated with the given node ID.
-  ConstNodeIterator getNodeItr(unsigned nodeID) const {
-    assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID.");
-    return idToNodeMap[nodeID];
-  }
-
-  /// \brief Removes the given node (and all attached edges) from the graph.
-  void removeNode(const NodeIterator &nodeItr) {
-    assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) &&
-           "Iterator does not belong to this graph!");
-
-    invalidateNodeIDs();
-    
-    NodeEntry &nodeEntry = getNodeEntry(nodeItr);
-
-    // We need to copy this out because it will be destroyed as the edges are
-    // removed.
-    typedef std::vector<EdgeIterator> AdjEdgeList;
-    typedef typename AdjEdgeList::iterator AdjEdgeListItr;
-
-    AdjEdgeList adjEdges;
-    adjEdges.reserve(nodeEntry.getDegree());
-    std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(),
-              std::back_inserter(adjEdges));
-
-    // Iterate over the copied out edges and remove them from the graph.
-    for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end();
-         itr != end; ++itr) {
-      removeEdge(*itr);
-    }
-
-    // Erase the node from the nodelist.
-    nodeList.erase(nodeItr);
-    --nodeListSize;
-  }
-
-  NodeIterator nodesBegin() { return nodeList.begin(); }
-  ConstNodeIterator nodesBegin() const { return nodeList.begin(); }
-  NodeIterator nodesEnd() { return nodeList.end(); }
-  ConstNodeIterator nodesEnd() const { return nodeList.end(); }
-
-  AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) {
-    return getNodeEntry(nodeItr).adjEdgesBegin();
-  }
-
-  ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const {
-    return getNodeEntry(nodeItr).adjEdgesBegin();
-  }
-
-  AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) {
-    return getNodeEntry(nodeItr).adjEdgesEnd();
-  }
-  
-  ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const {
-    getNodeEntry(nodeItr).adjEdgesEnd();
-  }
-
-  EdgeIterator findEdge(const NodeIterator &node1Itr,
-                        const NodeIterator &node2Itr) {
-
-    for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
-         adjEdgeEnd = adjEdgesEnd(node1Itr);
-         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
-      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
-          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
-        return *adjEdgeItr;
-      }
-    }
-
-    return edgeList.end();
-  }
-
-  ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr,
-                             const ConstNodeIterator &node2Itr) const {
-
-    for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr),
-         adjEdgeEnd = adjEdgesEnd(node1Itr);
-         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
-      if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) ||
-          (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) {
-        return *adjEdgeItr;
-      }
-    }
-
-    return edgeList.end();
-  }
-
-  Matrix& getEdgeCosts(const EdgeIterator &edgeItr) {
-    return getEdgeEntry(edgeItr).getCosts();
-  }
-
-  const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const {
-    return getEdgeEntry(edgeItr).getCosts();
-  }
-
-  NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) {
-    return getEdgeEntry(edgeItr).getNode1Itr();
-  }
-
-  ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const {
-    return getEdgeEntry(edgeItr).getNode1Itr();
-  }
-
-  NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) {
-    return getEdgeEntry(edgeItr).getNode2Itr();
-  }
-
-  ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const {
-    return getEdgeEntry(edgeItr).getNode2Itr();
-  }
-
-  NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr,
-                                const NodeIterator &nodeItr) {
-
-    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
-    if (nodeItr == edgeEntry.getNode1Itr()) {
-      return edgeEntry.getNode2Itr();
-    }
-    //else
-    return edgeEntry.getNode1Itr();
-  }
-
-  ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr,
-                                     const ConstNodeIterator &nodeItr) const {
-
-    const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
-    if (nodeItr == edgeEntry.getNode1Itr()) {
-      return edgeEntry.getNode2Itr();
-    }
-    //else
-    return edgeEntry.getNode1Itr();
-  }
-
-  void removeEdge(const EdgeIterator &edgeItr) {
-    assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) &&
-           "Iterator does not belong to this graph!");
-
-    --edgeListSize;
-
-    // Get the edge entry.
-    EdgeEntry &edgeEntry = getEdgeEntry(edgeItr);
-
-    // Get the nodes entry.
-    NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())),
-              &node2Entry(getNodeEntry(edgeEntry.getNode2Itr()));
-
-    // Disconnect the edge from the nodes.
-    node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr());
-    node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr());
-
-    // Remove the edge from the graph.
-    edgeList.erase(edgeItr);
-  }
-
-  EdgeIterator edgesBegin() { return edgeList.begin(); }
-  ConstEdgeIterator edgesBegin() const { return edgeList.begin(); }
-  EdgeIterator edgesEnd() { return edgeList.end(); }
-  ConstEdgeIterator edgesEnd() const { return edgeList.end(); }
-
-  void clear() {
-    nodeList.clear();
-    nodeListSize = 0;
-    edgeList.clear();
-    edgeListSize = 0;
-    idToNodeMap.clear();
-  }
-
-  template <typename OStream>
-  void printDot(OStream &os) const {
-    
-    assert(areNodeIDsValid() &&
-           "Cannot print a .dot of a graph unless IDs have been assigned.");
-    
-    os << "graph {\n";
-
-    for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-
-      os << "  node" << getNodeID(nodeItr) << " [ label=\""
-         << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n";
-    }
-
-    os << "  edge [ len=" << getNumNodes() << " ]\n";
-
-    for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-
-      os << "  node" << getNodeID(getEdgeNode1Itr(edgeItr))
-         << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr))
-         << " [ label=\"";
-
-      const Matrix &edgeCosts = getEdgeCosts(edgeItr);
-
-      for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
-        os << edgeCosts.getRowAsVector(i) << "\\n";
-      }
-
-      os << "\" ]\n";
-    }
-
-    os << "}\n";
-  }
-
-  template <typename OStream>
-  void printDot(OStream &os) {
-    if (!areNodeIDsValid()) {
-      assignNodeIDs();
-    }
-
-    const_cast<const ThisGraphT*>(this)->printDot(os);
-  }
-
-  template <typename OStream>
-  void dumpTo(OStream &os) const {
-    typedef ConstNodeIterator ConstNodeID;
-    
-    assert(areNodeIDsValid() &&
-           "Cannot dump a graph unless IDs have been assigned.");
-
-    for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd();
-         nItr != nEnd; ++nItr) {
-      os << getNodeID(nItr) << "\n";
-    }
-
-    unsigned edgeNumber = 1;
-    for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd();
-         eItr != eEnd; ++eItr) {
-
-      os << edgeNumber++ << ": { "
-         << getNodeID(getEdgeNode1Itr(eItr)) << ", "
-         << getNodeID(getEdgeNode2Itr(eItr)) << " }\n";
-    }
-
-  }
-
-  template <typename OStream>
-  void dumpTo(OStream &os) {
-    if (!areNodeIDsValid()) {
-      assignNodeIDs();
-    }
-
-    const_cast<const ThisGraphT*>(this)->dumpTo(os);
-  }
-
-};
-
-/// \brief Provides a base from which to derive nodes for GraphBase.
-template <typename NodeImpl, typename EdgeImpl>
-class NodeBase {
-private:
-
-  typedef GraphBase<NodeImpl, EdgeImpl> GraphBaseT;
-  typedef NodeBaseTraits<GraphBaseT> ThisNodeBaseTraits;
-
-public:
-  typedef typename GraphBaseT::EdgeIterator EdgeIterator;
-
-private:
-  typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList;
-
-  unsigned degree, id;
-  Vector costs;
-  AdjEdgeList adjEdges;
-
-  void operator=(const NodeBase& other) {
-    assert(false && "Can't assign NodeEntrys.");
-  }
-
-public:
-
-  typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator;
-  typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator
-    ConstAdjEdgeIterator;
-
-  NodeBase(const Vector &costs) : degree(0), costs(costs) {
-    assert((costs.getLength() > 0) && "Can't have zero-length cost vector.");
-  }
-
-  Vector& getCosts() { return costs; }
-  const Vector& getCosts() const { return costs; }
-
-  unsigned getDegree() const { return degree;  }
-
-  void setID(unsigned id) { this->id = id; }
-  unsigned getID() const { return id; }
-
-  AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) {
-    ++degree;
-    return adjEdges.insert(adjEdges.end(), edgeItr);
-  }
-
-  void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) {
-    --degree;
-    adjEdges.erase(adjEdgeItr);
-  }
-
-  AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } 
-  ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); }
-  AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); }
-  ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); }
-
-};
-
-template <typename NodeImpl, typename EdgeImpl>
-class EdgeBase {
-public:
-  typedef typename GraphBase<NodeImpl, EdgeImpl>::NodeIterator NodeIterator;
-  typedef typename GraphBase<NodeImpl, EdgeImpl>::EdgeIterator EdgeIterator;
-
-  typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator;
-
-private:
-
-  NodeIterator node1Itr, node2Itr;
-  NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr;
-  Matrix costs;
-
-  void operator=(const EdgeBase &other) {
-    assert(false && "Can't assign EdgeEntrys.");
-  }
-
-public:
-
-  EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
-           const Matrix &costs) :
-    node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) {
-
-    assert((costs.getRows() > 0) && (costs.getCols() > 0) &&
-           "Can't have zero-dimensioned cost matrices");
-  }
-
-  Matrix& getCosts() { return costs; }
-  const Matrix& getCosts() const { return costs; }
-
-  const NodeIterator& getNode1Itr() const { return node1Itr; }
-  const NodeIterator& getNode2Itr() const { return node2Itr; }
-
-  void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) {
-    this->node1ThisEdgeItr = node1ThisEdgeItr;
-  }
-
-  const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const {
-    return node1ThisEdgeItr;
-  }
-
-  void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) {
-    this->node2ThisEdgeItr = node2ThisEdgeItr;
-  }
-
-  const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const {
-    return node2ThisEdgeItr;
-  }
-
-};
-
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
new file mode 100644
index 0000000..3bb24e1
--- /dev/null
+++ b/lib/CodeGen/PBQP/HeuristicBase.h
@@ -0,0 +1,242 @@
+//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+
+#include "HeuristicSolver.h"
+
+namespace PBQP {
+
+  /// \brief Abstract base class for heuristic implementations.
+  ///
+  /// This class provides a handy base for heuristic implementations with common
+  /// solver behaviour implemented for a number of methods.
+  ///
+  /// To implement your own heuristic using this class as a base you'll have to
+  /// implement, as a minimum, the following methods:
+  /// <ul>
+  ///   <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
+  ///        heuristic reduction list.
+  ///   <li> void heuristicReduce() : Perform a single heuristic reduction.
+  ///   <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
+  ///        change to the cost matrix on the given edge (by R2).
+  ///   <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new 
+  ///        costs on the given edge.
+  ///   <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
+  ///        edge into the PBQP graph (by R2).
+  ///   <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
+  ///        disconnection of the given edge from the given node.
+  ///   <li> A constructor for your derived class : to pass back a reference to
+  ///        the solver which is using this heuristic.
+  /// </ul>
+  ///
+  /// These methods are implemented in this class for documentation purposes,
+  /// but will assert if called.
+  /// 
+  /// Note that this class uses the curiously recursive template idiom to
+  /// forward calls to the derived class. These methods need not be made
+  /// virtual, and indeed probably shouldn't for performance reasons.
+  ///
+  /// You'll also need to provide NodeData and EdgeData structs in your class.
+  /// These can be used to attach data relevant to your heuristic to each
+  /// node/edge in the PBQP graph.
+
+  template <typename HImpl>
+  class HeuristicBase {
+  private:
+
+    typedef std::list<Graph::NodeItr> OptimalList;
+
+    HeuristicSolverImpl<HImpl> &s;
+    Graph &g;
+    OptimalList optimalList;
+
+    // Return a reference to the derived heuristic.
+    HImpl& impl() { return static_cast<HImpl&>(*this); }
+
+    // Add the given node to the optimal reductions list. Keep an iterator to
+    // its location for fast removal. 
+    void addToOptimalReductionList(Graph::NodeItr nItr) {
+      optimalList.insert(optimalList.end(), nItr);
+    }
+
+  public:
+
+    /// \brief Construct an instance with a reference to the given solver.
+    /// @param solver The solver which is using this heuristic instance.
+    HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
+      : s(solver), g(s.getGraph()) { }
+
+    /// \brief Get the solver which is using this heuristic instance.
+    /// @return The solver which is using this heuristic instance.
+    ///
+    /// You can use this method to get access to the solver in your derived
+    /// heuristic implementation.
+    HeuristicSolverImpl<HImpl>& getSolver() { return s; }
+
+    /// \brief Get the graph representing the problem to be solved.
+    /// @return The graph representing the problem to be solved.
+    Graph& getGraph() { return g; }
+
+    /// \brief Tell the solver to simplify the graph before the reduction phase.
+    /// @return Whether or not the solver should run a simplification phase
+    ///         prior to the main setup and reduction.
+    ///
+    /// HeuristicBase returns true from this method as it's a sensible default,
+    /// however you can over-ride it in your derived class if you want different
+    /// behaviour.
+    bool solverRunSimplify() const { return true; }
+
+    /// \brief Decide whether a node should be optimally or heuristically 
+    ///        reduced.
+    /// @return Whether or not the given node should be listed for optimal
+    ///         reduction (via R0, R1 or R2).
+    ///
+    /// HeuristicBase returns true for any node with degree less than 3. This is
+    /// sane and sensible for many situations, but not all. You can over-ride
+    /// this method in your derived class if you want a different selection
+    /// criteria. Note however that your criteria for selecting optimal nodes
+    /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
+    /// higher should not be selected under any circumstances.
+    bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+      if (g.getNodeDegree(nItr) < 3)
+        return true;
+      // else
+      return false;
+    }
+
+    /// \brief Add the given node to the list of nodes to be optimally reduced.
+    /// @return nItr Node iterator to be added.
+    ///
+    /// You probably don't want to over-ride this, except perhaps to record
+    /// statistics before calling this implementation. HeuristicBase relies on
+    /// its behaviour.
+    void addToOptimalReduceList(Graph::NodeItr nItr) {
+      optimalList.push_back(nItr);
+    }
+
+    /// \brief Initialise the heuristic.
+    ///
+    /// HeuristicBase iterates over all nodes in the problem and adds them to
+    /// the appropriate list using addToOptimalReduceList or
+    /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
+    ///
+    /// This behaviour should be fine for most situations.
+    void setup() {
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+        if (impl().shouldOptimallyReduce(nItr)) {
+          addToOptimalReduceList(nItr);
+        } else {
+          impl().addToHeuristicReduceList(nItr);
+        }
+      }
+    }
+
+    /// \brief Optimally reduce one of the nodes in the optimal reduce list.
+    /// @return True if a reduction takes place, false if the optimal reduce
+    ///         list is empty.
+    ///
+    /// Selects a node from the optimal reduce list and removes it, applying
+    /// R0, R1 or R2 as appropriate based on the selected node's degree.
+    bool optimalReduce() {
+      if (optimalList.empty())
+        return false;
+
+      Graph::NodeItr nItr = optimalList.front();
+      optimalList.pop_front();
+
+      switch (s.getSolverDegree(nItr)) {
+        case 0: s.applyR0(nItr); break;
+        case 1: s.applyR1(nItr); break;
+        case 2: s.applyR2(nItr); break;
+        default: assert(false &&
+                        "Optimal reductions of degree > 2 nodes is invalid.");
+      }
+
+      return true;
+    }
+
+    /// \brief Perform the PBQP reduction process.
+    ///
+    /// Reduces the problem to the empty graph by repeated application of the
+    /// reduction rules R0, R1, R2 and RN.
+    /// R0, R1 or R2 are always applied if possible before RN is used.
+    void reduce() {
+      bool finished = false;
+
+      while (!finished) {
+        if (!optimalReduce())
+          if (!impl().heuristicReduce())
+            finished = true;
+      }
+    }
+
+    /// \brief Add a node to the heuristic reduce list.
+    /// @param nItr Node iterator to add to the heuristic reduce list.
+    void addToHeuristicList(Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Heuristically reduce one of the nodes in the heuristic
+    ///        reduce list.
+    /// @return True if a reduction takes place, false if the heuristic reduce
+    ///         list is empty.
+    void heuristicReduce() {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Prepare a change in the costs on the given edge.
+    /// @param eItr Edge iterator.    
+    void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the change in the costs on the given edge.
+    /// @param eItr Edge iterator.
+    void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the addition of a new edge into the PBQP graph.
+    /// @param eItr Edge iterator for the added edge.
+    void handleAddEdge(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle disconnection of an edge from a node.
+    /// @param eItr Edge iterator for edge being disconnected.
+    /// @param nItr Node iterator for the node being disconnected from.
+    ///
+    /// Edges are frequently removed due to the removal of a node. This
+    /// method allows for the effect to be computed only for the remaining
+    /// node in the graph.
+    void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Clean up any structures used by HeuristicBase.
+    ///
+    /// At present this just performs a sanity check: that the optimal reduce
+    /// list is empty now that reduction has completed.
+    ///
+    /// If your derived class has more complex structures which need tearing
+    /// down you should over-ride this method but include a call back to this
+    /// implementation.
+    void cleanup() {
+      assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
+    }
+
+  };
+
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index f78a58a..c156264 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -1,4 +1,4 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===//
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,780 +9,598 @@
 //
 // Heuristic PBQP solver. This solver is able to perform optimal reductions for
 // nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
-// used to to select a node for reduction. 
+// used to select a node for reduction. 
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
 #define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
 
-#include "Solver.h"
-#include "AnnotatedGraph.h"
-#include "llvm/Support/raw_ostream.h"
+#include "Graph.h"
+#include "Solution.h"
+#include <vector>
 #include <limits>
 
 namespace PBQP {
 
-/// \brief Important types for the HeuristicSolverImpl.
-/// 
-/// Declared seperately to allow access to heuristic classes before the solver
-/// is fully constructed.
-template <typename HeuristicNodeData, typename HeuristicEdgeData>
-class HSITypes {
-public:
-
-  class NodeData;
-  class EdgeData;
-
-  typedef AnnotatedGraph<NodeData, EdgeData> SolverGraph;
-  typedef typename SolverGraph::NodeIterator GraphNodeIterator;
-  typedef typename SolverGraph::EdgeIterator GraphEdgeIterator;
-  typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator;
-
-  typedef std::list<GraphNodeIterator> NodeList;
-  typedef typename NodeList::iterator NodeListIterator;
-
-  typedef std::vector<GraphNodeIterator> NodeStack;
-  typedef typename NodeStack::iterator NodeStackIterator;
-
-  class NodeData {
-    friend class EdgeData;
-
+  /// \brief Heuristic PBQP solver implementation.
+  ///
+  /// This class should usually be created (and destroyed) indirectly via a call
+  /// to HeuristicSolver<HImpl>::solve(Graph&).
+  /// See the comments for HeuristicSolver.
+  ///
+  /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
+  /// backpropagation phase, and maintains the internal copy of the graph on
+  /// which the reduction is carried out (the original being kept to facilitate
+  /// backpropagation).
+  template <typename HImpl>
+  class HeuristicSolverImpl {
   private:
 
-    typedef std::list<GraphEdgeIterator> LinksList;
+    typedef typename HImpl::NodeData HeuristicNodeData;
+    typedef typename HImpl::EdgeData HeuristicEdgeData;
 
-    unsigned numLinks;
-    LinksList links, solvedLinks;
-    NodeListIterator bucketItr;
-    HeuristicNodeData heuristicData;
+    typedef std::list<Graph::EdgeItr> SolverEdges;
 
   public:
-
-    typedef typename LinksList::iterator AdjLinkIterator;
+  
+    /// \brief Iterator type for edges in the solver graph.
+    typedef SolverEdges::iterator SolverEdgeItr;
 
   private:
 
-    AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) {
-      ++numLinks;
-      return links.insert(links.end(), edgeItr);
-    }
+    class NodeData {
+    public:
+      NodeData() : solverDegree(0) {}
 
-    void delLink(const AdjLinkIterator &adjLinkItr) {
-      --numLinks;
-      links.erase(adjLinkItr);
-    }
+      HeuristicNodeData& getHeuristicData() { return hData; }
 
-  public:
-
-    NodeData() : numLinks(0) {}
-
-    unsigned getLinkDegree() const { return numLinks; }
-
-    HeuristicNodeData& getHeuristicData() { return heuristicData; }
-    const HeuristicNodeData& getHeuristicData() const {
-      return heuristicData;
-    }
+      SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
+        ++solverDegree;
+        return solverEdges.insert(solverEdges.end(), eItr);
+      }
 
-    void setBucketItr(const NodeListIterator &bucketItr) {
-      this->bucketItr = bucketItr;
-    }
+      void removeSolverEdge(SolverEdgeItr seItr) {
+        --solverDegree;
+        solverEdges.erase(seItr);
+      }
 
-    const NodeListIterator& getBucketItr() const {
-      return bucketItr;
-    }
+      SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
+      SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
+      unsigned getSolverDegree() const { return solverDegree; }
+      void clearSolverEdges() {
+        solverDegree = 0;
+        solverEdges.clear(); 
+      }
+      
+    private:
+      HeuristicNodeData hData;
+      unsigned solverDegree;
+      SolverEdges solverEdges;
+    };
+ 
+    class EdgeData {
+    public:
+      HeuristicEdgeData& getHeuristicData() { return hData; }
+
+      void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
+        this->n1SolverEdgeItr = n1SolverEdgeItr;
+      }
 
-    AdjLinkIterator adjLinksBegin() {
-      return links.begin();
-    }
+      SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
 
-    AdjLinkIterator adjLinksEnd() {
-      return links.end();
-    }
+      void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
+        this->n2SolverEdgeItr = n2SolverEdgeItr;
+      }
 
-    void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) {
-      solvedLinks.push_back(solvedLinkItr);
-    }
+      SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
 
-    AdjLinkIterator solvedLinksBegin() {
-      return solvedLinks.begin();
-    }
+    private:
 
-    AdjLinkIterator solvedLinksEnd() {
-      return solvedLinks.end();
-    }
+      HeuristicEdgeData hData;
+      SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
+    };
 
-  };
+    Graph &g;
+    HImpl h;
+    Solution s;
+    std::vector<Graph::NodeItr> stack;
 
-  class EdgeData {
-  private:
+    typedef std::list<NodeData> NodeDataList;
+    NodeDataList nodeDataList;
 
-    SolverGraph &g;
-    GraphNodeIterator node1Itr, node2Itr;
-    HeuristicEdgeData heuristicData;
-    typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr;
+    typedef std::list<EdgeData> EdgeDataList;
+    EdgeDataList edgeDataList;
 
   public:
 
-    EdgeData(SolverGraph &g) : g(g) {}
-
-    HeuristicEdgeData& getHeuristicData() { return heuristicData; }
-    const HeuristicEdgeData& getHeuristicData() const {
-      return heuristicData;
-    }
-
-    void setup(const GraphEdgeIterator &thisEdgeItr) {
-      node1Itr = g.getEdgeNode1Itr(thisEdgeItr);
-      node2Itr = g.getEdgeNode2Itr(thisEdgeItr);
-
-      node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr);
-      node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr);
-    }
-
-    void unlink() {
-      g.getNodeData(node1Itr).delLink(node1ThisEdgeItr);
-      g.getNodeData(node2Itr).delLink(node2ThisEdgeItr);
-    }
-
-  };
-
-};
-
-template <typename Heuristic>
-class HeuristicSolverImpl {
-public:
-  // Typedefs to make life easier:
-  typedef HSITypes<typename Heuristic::NodeData,
-                   typename Heuristic::EdgeData> HSIT;
-  typedef typename HSIT::SolverGraph SolverGraph;
-  typedef typename HSIT::NodeData NodeData;
-  typedef typename HSIT::EdgeData EdgeData;
-  typedef typename HSIT::GraphNodeIterator GraphNodeIterator;
-  typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator;
-  typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator;
-
-  typedef typename HSIT::NodeList NodeList;
-  typedef typename HSIT::NodeListIterator NodeListIterator;
-
-  typedef std::vector<GraphNodeIterator> NodeStack;
-  typedef typename NodeStack::iterator NodeStackIterator;
-
-  /// \brief Constructor, which performs all the actual solver work.
-  HeuristicSolverImpl(const SimpleGraph &orig) :
-    solution(orig.getNumNodes(), true)
-  {
-    copyGraph(orig);
-    simplify();
-    setup();
-    computeSolution();
-    computeSolutionCost(orig);
-  }
-
-  /// \brief Returns the graph for this solver.
-  SolverGraph& getGraph() { return g; }
-
-  /// \brief Return the solution found by this solver.
-  const Solution& getSolution() const { return solution; }
-
-private:
-
-  /// \brief Add the given node to the appropriate bucket for its link
-  /// degree.
-  void addToBucket(const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g.getNodeData(nodeItr);
-
-    switch (nodeData.getLinkDegree()) {
-      case 0: nodeData.setBucketItr(
-                r0Bucket.insert(r0Bucket.end(), nodeItr));
-              break;                                            
-      case 1: nodeData.setBucketItr(
-                r1Bucket.insert(r1Bucket.end(), nodeItr));
-              break;
-      case 2: nodeData.setBucketItr(
-                r2Bucket.insert(r2Bucket.end(), nodeItr));
-              break;
-      default: heuristic.addToRNBucket(nodeItr);
-               break;
-    }
-  }
-
-  /// \brief Remove the given node from the appropriate bucket for its link
-  /// degree.
-  void removeFromBucket(const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g.getNodeData(nodeItr);
-
-    switch (nodeData.getLinkDegree()) {
-      case 0: r0Bucket.erase(nodeData.getBucketItr()); break;
-      case 1: r1Bucket.erase(nodeData.getBucketItr()); break;
-      case 2: r2Bucket.erase(nodeData.getBucketItr()); break;
-      default: heuristic.removeFromRNBucket(nodeItr); break;
-    }
-  }
-
-public:
-
-  /// \brief Add a link.
-  void addLink(const GraphEdgeIterator &edgeItr) {
-    g.getEdgeData(edgeItr).setup(edgeItr);
-
-    if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) ||
-        (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) {
-      heuristic.handleAddLink(edgeItr);
-    }
-  }
-
-  /// \brief Remove link, update info for node.
-  ///
-  /// Only updates information for the given node, since usually the other
-  /// is about to be removed.
-  void removeLink(const GraphEdgeIterator &edgeItr,
-                  const GraphNodeIterator &nodeItr) {
-
-    if (g.getNodeData(nodeItr).getLinkDegree() > 2) {
-      heuristic.handleRemoveLink(edgeItr, nodeItr);
-    }
-    g.getEdgeData(edgeItr).unlink();
-  }
-
-  /// \brief Remove link, update info for both nodes. Useful for R2 only.
-  void removeLinkR2(const GraphEdgeIterator &edgeItr) {
-    GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr);
-
-    if (g.getNodeData(node1Itr).getLinkDegree() > 2) {
-      heuristic.handleRemoveLink(edgeItr, node1Itr);
+    /// \brief Construct a heuristic solver implementation to solve the given
+    ///        graph.
+    /// @param g The graph representing the problem instance to be solved.
+    HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}  
+
+    /// \brief Get the graph being solved by this solver.
+    /// @return The graph representing the problem instance being solved by this
+    ///         solver.
+    Graph& getGraph() { return g; }
+
+    /// \brief Get the heuristic data attached to the given node.
+    /// @param nItr Node iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).getHeuristicData();
+    }
+
+    /// \brief Get the heuristic data attached to the given edge.
+    /// @param eItr Edge iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+      return getSolverEdgeData(eItr).getHeuristicData();
+    }
+
+    /// \brief Begin iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges adjacent to the given node
+    ///         in the solver graph. 
+    SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesBegin();
+    }
+
+    /// \brief End iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges adjacent to the given node in
+    ///         the solver graph. 
+    SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesEnd();
+    }
+
+    /// \brief Remove a node from the solver graph.
+    /// @param eItr Edge iterator for edge to be removed.
+    ///
+    /// Does <i>not</i> notify the heuristic of the removal. That should be
+    /// done manually if necessary.
+    void removeSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+      n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
+      n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
+    }
+
+    /// \brief Compute a solution to the PBQP problem instance with which this
+    ///        heuristic solver was constructed.
+    /// @return A solution to the PBQP problem.
+    ///
+    /// Performs the full PBQP heuristic solver algorithm, including setup,
+    /// calls to the heuristic (which will call back to the reduction rules in
+    /// this class), and cleanup.
+    Solution computeSolution() {
+      setup();
+      h.setup();
+      h.reduce();
+      backpropagate();
+      h.cleanup();
+      cleanup();
+      return s;
+    }
+
+    /// \brief Add to the end of the stack.
+    /// @param nItr Node iterator to add to the reduction stack.
+    void pushToStack(Graph::NodeItr nItr) {
+      getSolverNodeData(nItr).clearSolverEdges();
+      stack.push_back(nItr);
+    }
+
+    /// \brief Returns the solver degree of the given node.
+    /// @param nItr Node iterator for which degree is requested.
+    /// @return Node degree in the <i>solver</i> graph (not the original graph).
+    unsigned getSolverDegree(Graph::NodeItr nItr) {
+      return  getSolverNodeData(nItr).getSolverDegree();
+    }
+
+    /// \brief Set the solution of the given node.
+    /// @param nItr Node iterator to set solution for.
+    /// @param selection Selection for node.
+    void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
+      s.setSelection(nItr, selection);
+
+      for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                             aeEnd = g.adjEdgesEnd(nItr);
+           aeItr != aeEnd; ++aeItr) {
+        Graph::EdgeItr eItr(*aeItr);
+        Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
+        getSolverNodeData(anItr).addSolverEdge(eItr);
+      }
     }
-    removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr));
-  }
-
-  /// \brief Removes all links connected to the given node.
-  void unlinkNode(const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g.getNodeData(nodeItr);
-
-    typedef std::vector<GraphEdgeIterator> TempEdgeList;
 
-    TempEdgeList edgesToUnlink;
-    edgesToUnlink.reserve(nodeData.getLinkDegree());
+    /// \brief Apply rule R0.
+    /// @param nItr Node iterator for node to apply R0 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR0(Graph::NodeItr nItr) {
+      assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
+             "R0 applied to node with degree != 0.");
 
-    // Copy adj edges into a temp vector. We want to destroy them during
-    // the unlink, and we can't do that while we're iterating over them.
-    std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(),
-              std::back_inserter(edgesToUnlink));
-
-    for (typename TempEdgeList::iterator
-         edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end();
-         edgeItr != edgeEnd; ++edgeItr) {
-
-      GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr);
-
-      removeFromBucket(otherNode);
-      removeLink(*edgeItr, otherNode);
-      addToBucket(otherNode);
+      // Nothing to do. Just push the node onto the reduction stack.
+      pushToStack(nItr);
     }
-  }
-
-  /// \brief Push the given node onto the stack to be solved with
-  /// backpropagation.
-  void pushStack(const GraphNodeIterator &nodeItr) {
-    stack.push_back(nodeItr);
-  }
-
-  /// \brief Set the solution of the given node.
-  void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) {
-    solution.setSelection(g.getNodeID(nodeItr), solIndex);
-
-    for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
-         adjEdgeEnd = g.adjEdgesEnd(nodeItr);
-         adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
-      GraphEdgeIterator edgeItr(*adjEdgeItr);
-      GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr));
-      g.getNodeData(adjNodeItr).addSolvedLink(edgeItr);
-    }
-  }
-
-private:
 
-  SolverGraph g;
-  Heuristic heuristic;
-  Solution solution;
+    /// \brief Apply rule R1.
+    /// @param nItr Node iterator for node to apply R1 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR1(Graph::NodeItr xnItr) {
+      NodeData &nd = getSolverNodeData(xnItr);
+      assert(nd.getSolverDegree() == 1 &&
+             "R1 applied to node with degree != 1.");
 
-  NodeList r0Bucket,
-           r1Bucket,
-           r2Bucket;
+      Graph::EdgeItr eItr = *nd.solverEdgesBegin();
 
-  NodeStack stack;
-
-  // Copy the SimpleGraph into an annotated graph which we can use for reduction.
-  void copyGraph(const SimpleGraph &orig) {
-
-    assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) &&
-           "Graph should be empty prior to solver setup.");
-
-    assert(orig.areNodeIDsValid() &&
-           "Cannot copy from a graph with invalid node IDs.");
-
-    std::vector<GraphNodeIterator> newNodeItrs;
-
-    for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) {
-      newNodeItrs.push_back(
-        g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData()));
+      const Matrix &eCosts = g.getEdgeCosts(eItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
+      
+      // Duplicate a little to avoid transposing matrices.
+      if (xnItr == g.getEdgeNode1(eItr)) {
+        Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned j = 0; j < yCosts.getLength(); ++j) {
+          PBQPNum min = eCosts[0][j] + xCosts[0];
+          for (unsigned i = 1; i < xCosts.getLength(); ++i) {
+            PBQPNum c = eCosts[i][j] + xCosts[i];
+            if (c < min)
+              min = c;
+          }
+          yCosts[j] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+     } else {
+        Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned i = 0; i < yCosts.getLength(); ++i) {
+          PBQPNum min = eCosts[i][0] + xCosts[0];
+          for (unsigned j = 1; j < xCosts.getLength(); ++j) {
+            PBQPNum c = eCosts[i][j] + xCosts[j];
+            if (c < min)
+              min = c;
+          }
+          yCosts[i] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+      }
+      removeSolverEdge(eItr);
+      assert(nd.getSolverDegree() == 0 &&
+             "Degree 1 with edge removed should be 0.");
+      pushToStack(xnItr);
     }
 
-    for (SimpleGraph::ConstEdgeIterator
-         origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd();
-         origEdgeItr != origEdgeEnd; ++origEdgeItr) {
-
-      unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)),
-               id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr));
+    /// \brief Apply rule R2.
+    /// @param nItr Node iterator for node to apply R2 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR2(Graph::NodeItr xnItr) {
+      assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
+             "R2 applied to node with degree != 2.");
 
-      g.addEdge(newNodeItrs[id1], newNodeItrs[id2],
-                orig.getEdgeCosts(origEdgeItr), EdgeData(g));
-    }
+      NodeData &nd = getSolverNodeData(xnItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
 
-    // Assign IDs to the new nodes using the ordering from the old graph,
-    // this will lead to nodes in the new graph getting the same ID as the
-    // corresponding node in the old graph.
-    g.assignNodeIDs(newNodeItrs);
-  }
+      SolverEdgeItr aeItr = nd.solverEdgesBegin();
+      Graph::EdgeItr yxeItr = *aeItr,
+                     zxeItr = *(++aeItr);
 
-  // Simplify the annotated graph by eliminating independent edges and trivial
-  // nodes. 
-  void simplify() {
-    disconnectTrivialNodes();
-    eliminateIndependentEdges();
-  }
+      Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
+                     znItr = g.getEdgeOtherNode(zxeItr, xnItr);
 
-  // Eliminate trivial nodes.
-  void disconnectTrivialNodes() {
-    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
+      bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
+           flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
 
-      if (g.getNodeCosts(nodeItr).getLength() == 1) {
+      const Matrix *yxeCosts = flipEdge1 ?
+        new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
+        &g.getEdgeCosts(yxeItr);
 
-        std::vector<GraphEdgeIterator> edgesToRemove;
+      const Matrix *zxeCosts = flipEdge2 ?
+        new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
+        &g.getEdgeCosts(zxeItr);
 
-        for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr),
-             adjEdgeEnd = g.adjEdgesEnd(nodeItr);
-             adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) {
+      unsigned xLen = xCosts.getLength(),
+               yLen = yxeCosts->getRows(),
+               zLen = zxeCosts->getRows();
+               
+      Matrix delta(yLen, zLen);
 
-          GraphEdgeIterator edgeItr = *adjEdgeItr;
-
-          if (g.getEdgeNode1Itr(edgeItr) == nodeItr) {
-            GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr);
-            g.getNodeCosts(otherNodeItr) +=
-              g.getEdgeCosts(edgeItr).getRowAsVector(0);
-          }
-          else {
-            GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr);
-            g.getNodeCosts(otherNodeItr) +=
-              g.getEdgeCosts(edgeItr).getColAsVector(0);
+      for (unsigned i = 0; i < yLen; ++i) {
+        for (unsigned j = 0; j < zLen; ++j) {
+          PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
+          for (unsigned k = 1; k < xLen; ++k) {
+            PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
+            if (c < min) {
+              min = c;
+            }
           }
-
-          edgesToRemove.push_back(edgeItr);
+          delta[i][j] = min;
         }
+      }
 
-        while (!edgesToRemove.empty()) {
-          g.removeEdge(edgesToRemove.back());
-          edgesToRemove.pop_back();
+      if (flipEdge1)
+        delete yxeCosts;
+
+      if (flipEdge2)
+        delete zxeCosts;
+
+      Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
+      bool addedEdge = false;
+
+      if (yzeItr == g.edgesEnd()) {
+        yzeItr = g.addEdge(ynItr, znItr, delta);
+        addedEdge = true;
+      } else {
+        Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
+        h.preUpdateEdgeCosts(yzeItr);
+        if (ynItr == g.getEdgeNode1(yzeItr)) {
+          yzeCosts += delta;
+        } else {
+          yzeCosts += delta.transpose();
         }
       }
-    }
-  }
 
-  void eliminateIndependentEdges() {
-    std::vector<GraphEdgeIterator> edgesToProcess;
+      bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
 
-    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-      edgesToProcess.push_back(edgeItr);
-    }
-
-    while (!edgesToProcess.empty()) {
-      tryToEliminateEdge(edgesToProcess.back());
-      edgesToProcess.pop_back();
-    }
-  }
-
-  void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) {
-    if (tryNormaliseEdgeMatrix(edgeItr)) {
-      g.removeEdge(edgeItr); 
-    }
-  }
-
-  bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) {
-
-    Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
-    Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)),
-               &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr));
-
-    for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-      PBQPNum rowMin = edgeCosts.getRowMin(r);
-      uCosts[r] += rowMin;
-      if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
-        edgeCosts.subFromRow(r, rowMin);
+      if (!addedEdge) {
+        // If we modified the edge costs let the heuristic know.
+        h.postUpdateEdgeCosts(yzeItr);
       }
-      else {
-        edgeCosts.setRow(r, 0);
+ 
+      if (nullCostEdge) {
+        // If this edge ended up null remove it.
+        if (!addedEdge) {
+          // We didn't just add it, so we need to notify the heuristic
+          // and remove it from the solver.
+          h.handleRemoveEdge(yzeItr, ynItr);
+          h.handleRemoveEdge(yzeItr, znItr);
+          removeSolverEdge(yzeItr);
+        }
+        g.removeEdge(yzeItr);
+      } else if (addedEdge) {
+        // If the edge was added, and non-null, finish setting it up, add it to
+        // the solver & notify heuristic.
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(yzeItr, &edgeDataList.back());
+        addSolverEdge(yzeItr);
+        h.handleAddEdge(yzeItr);
       }
-    }
 
-    for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-      PBQPNum colMin = edgeCosts.getColMin(c);
-      vCosts[c] += colMin;
-      if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
-        edgeCosts.subFromCol(c, colMin);
-      }
-      else {
-        edgeCosts.setCol(c, 0);
-      }
-    }
+      h.handleRemoveEdge(yxeItr, ynItr);
+      removeSolverEdge(yxeItr);
+      h.handleRemoveEdge(zxeItr, znItr);
+      removeSolverEdge(zxeItr);
 
-    return edgeCosts.isZero();
-  }
+      pushToStack(xnItr);
+    }
 
-  void setup() {
-    setupLinks();
-    heuristic.initialise(*this);
-    setupBuckets();
-  }
+  private:
 
-  void setupLinks() {
-    for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-      g.getEdgeData(edgeItr).setup(edgeItr);
+    NodeData& getSolverNodeData(Graph::NodeItr nItr) {
+      return *static_cast<NodeData*>(g.getNodeData(nItr));
     }
-  }
 
-  void setupBuckets() {
-    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-      addToBucket(nodeItr);
-    }
-  }
-
-  void computeSolution() {
-    assert(g.areNodeIDsValid() &&
-           "Nodes cannot be added/removed during reduction.");
-
-    reduce();
-    computeTrivialSolutions();
-    backpropagate();
-  }
-
-  void printNode(const GraphNodeIterator &nodeItr) {
-    llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n"
-                 << "  costs = " << g.getNodeCosts(nodeItr) << "\n"
-                 << "  link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n"
-                 << "  links = [ ";
-
-    for (typename HSIT::NodeData::AdjLinkIterator 
-         aeItr = g.getNodeData(nodeItr).adjLinksBegin(),
-         aeEnd = g.getNodeData(nodeItr).adjLinksEnd();
-         aeItr != aeEnd; ++aeItr) {
-      llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr))
-                   << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr))
-                   << ") ";
+    EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
+      return *static_cast<EdgeData*>(g.getEdgeData(eItr));
     }
-    llvm::errs() << "]\n";
-  }
 
-  void dumpState() {
-    llvm::errs() << "\n";
+    void addSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
 
-    for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-      printNode(nodeItr);
+      eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
+      eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
     }
 
-    NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket };
-
-    for (unsigned b = 0; b < 3; ++b) {
-      NodeList &bucket = *buckets[b];
-
-      llvm::errs() << "Bucket " << b << ": [ ";
-
-      for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end();
-           nItr != nEnd; ++nItr) {
-        llvm::errs() << g.getNodeID(*nItr) << " ";
+    void setup() {
+      if (h.solverRunSimplify()) {
+        simplify();
       }
 
-      llvm::errs() << "]\n";
-    }
+      // Create node data objects.
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+	       nItr != nEnd; ++nItr) {
+        nodeDataList.push_back(NodeData());
+        g.setNodeData(nItr, &nodeDataList.back());
+      }
 
-    llvm::errs() << "Stack: [ ";
-    for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end();
-         nsItr != nsEnd; ++nsItr) {
-      llvm::errs() << g.getNodeID(*nsItr) << " ";
+      // Create edge data objects.
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(eItr, &edgeDataList.back());
+        addSolverEdge(eItr);
+      }
     }
-    llvm::errs() << "]\n";
-  }
-
-  void reduce() {
-    bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() &&
-      heuristic.rNBucketEmpty();
 
-    while (!reductionFinished) {
-
-      if (!r1Bucket.empty()) {
-        processR1();
-      }
-      else if (!r2Bucket.empty()) {
-        processR2();
-      }
-      else if (!heuristic.rNBucketEmpty()) {
-        solution.setProvedOptimal(false);
-        solution.incRNReductions();
-        heuristic.processRN();
-      } 
-      else reductionFinished = true;
+    void simplify() {
+      disconnectTrivialNodes();
+      eliminateIndependentEdges();
     }
-      
-  }
 
-  void processR1() {
+    // Eliminate trivial nodes.
+    void disconnectTrivialNodes() {
+      unsigned numDisconnected = 0;
 
-    // Remove the first node in the R0 bucket:
-    GraphNodeIterator xNodeItr = r1Bucket.front();
-    r1Bucket.pop_front();
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
 
-    solution.incR1Reductions();
+        if (g.getNodeCosts(nItr).getLength() == 1) {
 
-    //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n";
+          std::vector<Graph::EdgeItr> edgesToRemove;
 
-    assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) &&
-           "Node in R1 bucket has degree != 1");
+          for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                                 aeEnd = g.adjEdgesEnd(nItr);
+               aeItr != aeEnd; ++aeItr) {
 
-    GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin();
+            Graph::EdgeItr eItr = *aeItr;
 
-    const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
+            if (g.getEdgeNode1(eItr) == nItr) {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getRowAsVector(0);
+            }
+            else {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getColAsVector(0);
+            }
 
-    const Vector &xCosts = g.getNodeCosts(xNodeItr);
-    unsigned xLen = xCosts.getLength();
+            edgesToRemove.push_back(eItr);
+          }
 
-    // Duplicate a little code to avoid transposing matrices:
-    if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) {
-      GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr);
-      Vector &yCosts = g.getNodeCosts(yNodeItr);
-      unsigned yLen = yCosts.getLength();
+          if (!edgesToRemove.empty())
+            ++numDisconnected;
 
-      for (unsigned j = 0; j < yLen; ++j) {
-        PBQPNum min = edgeCosts[0][j] + xCosts[0];
-        for (unsigned i = 1; i < xLen; ++i) {
-          PBQPNum c = edgeCosts[i][j] + xCosts[i];
-          if (c < min)
-            min = c;
+          while (!edgesToRemove.empty()) {
+            g.removeEdge(edgesToRemove.back());
+            edgesToRemove.pop_back();
+          }
         }
-        yCosts[j] += min;
       }
     }
-    else {
-      GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr);
-      Vector &yCosts = g.getNodeCosts(yNodeItr);
-      unsigned yLen = yCosts.getLength();
 
-      for (unsigned i = 0; i < yLen; ++i) {
-        PBQPNum min = edgeCosts[i][0] + xCosts[0];
+    void eliminateIndependentEdges() {
+      std::vector<Graph::EdgeItr> edgesToProcess;
+      unsigned numEliminated = 0;
 
-        for (unsigned j = 1; j < xLen; ++j) {
-          PBQPNum c = edgeCosts[i][j] + xCosts[j];
-          if (c < min)
-            min = c;
-        }
-        yCosts[i] += min;
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgesToProcess.push_back(eItr);
       }
-    }
-
-    unlinkNode(xNodeItr);
-    pushStack(xNodeItr);
-  }
-
-  void processR2() {
-
-    GraphNodeIterator xNodeItr = r2Bucket.front();
-    r2Bucket.pop_front();
-
-    solution.incR2Reductions();
-
-    // Unlink is unsafe here. At some point it may optimistically more a node
-    // to a lower-degree list when its degree will later rise, or vice versa,
-    // violating the assumption that node degrees monotonically decrease
-    // during the reduction phase. Instead we'll bucket shuffle manually.
-    pushStack(xNodeItr);
-
-    assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) &&
-           "Node in R2 bucket has degree != 2");
-
-    const Vector &xCosts = g.getNodeCosts(xNodeItr);
 
-    typename NodeData::AdjLinkIterator tempItr =
-      g.getNodeData(xNodeItr).adjLinksBegin();
-
-    GraphEdgeIterator yxEdgeItr = *tempItr,
-                      zxEdgeItr = *(++tempItr);
+      while (!edgesToProcess.empty()) {
+        if (tryToEliminateEdge(edgesToProcess.back()))
+          ++numEliminated;
+        edgesToProcess.pop_back();
+      }
+    }
 
-    GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr),
-                      zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr);
+    bool tryToEliminateEdge(Graph::EdgeItr eItr) {
+      if (tryNormaliseEdgeMatrix(eItr)) {
+        g.removeEdge(eItr);
+        return true; 
+      }
+      return false;
+    }
 
-    removeFromBucket(yNodeItr);
-    removeFromBucket(zNodeItr);
+    bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
 
-    removeLink(yxEdgeItr, yNodeItr);
-    removeLink(zxEdgeItr, zNodeItr);
+      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
 
-    // Graph some of the costs:
-    bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr),
-         flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr);
+      Matrix &edgeCosts = g.getEdgeCosts(eItr);
+      Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
+             &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
 
-    const Matrix *yxCosts = flipEdge1 ?
-      new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) :
-      &g.getEdgeCosts(yxEdgeItr),
-                     *zxCosts = flipEdge2 ?
-      new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) :
-        &g.getEdgeCosts(zxEdgeItr);
+      for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+        PBQPNum rowMin = infinity;
 
-    unsigned xLen = xCosts.getLength(),
-             yLen = yxCosts->getRows(),
-             zLen = zxCosts->getRows();
+        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
+            rowMin = edgeCosts[r][c];
+        }
 
-    // Compute delta:
-    Matrix delta(yLen, zLen);
+        uCosts[r] += rowMin;
 
-    for (unsigned i = 0; i < yLen; ++i) {
-      for (unsigned j = 0; j < zLen; ++j) {
-        PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0];
-        for (unsigned k = 1; k < xLen; ++k) {
-          PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k];
-          if (c < min) {
-            min = c;
-          }
+        if (rowMin != infinity) {
+          edgeCosts.subFromRow(r, rowMin);
+        }
+        else {
+          edgeCosts.setRow(r, 0);
         }
-        delta[i][j] = min;
       }
-    }
-
-    if (flipEdge1)
-      delete yxCosts;
 
-    if (flipEdge2)
-      delete zxCosts;
+      for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+        PBQPNum colMin = infinity;
 
-    // Deal with the potentially induced yz edge.
-    GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr);
-    if (yzEdgeItr == g.edgesEnd()) {
-      yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g));
-    }
-    else {
-      // There was an edge, but we're going to screw with it. Delete the old
-      // link, update the costs. We'll re-link it later.
-      removeLinkR2(yzEdgeItr);
-      g.getEdgeCosts(yzEdgeItr) +=
-        (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ?
-        delta : delta.transpose();
-    }
+        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
+            colMin = edgeCosts[r][c];
+        }
 
-    bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr);
+        vCosts[c] += colMin;
 
-    // Nulled the edge, remove it entirely.
-    if (nullCostEdge) {
-      g.removeEdge(yzEdgeItr);
-    }
-    else {
-      // Edge remains - re-link it.
-      addLink(yzEdgeItr);
-    }
+        if (colMin != infinity) {
+          edgeCosts.subFromCol(c, colMin);
+        }
+        else {
+          edgeCosts.setCol(c, 0);
+        }
+      }
 
-    addToBucket(yNodeItr);
-    addToBucket(zNodeItr);
+      return edgeCosts.isZero();
     }
 
-  void computeTrivialSolutions() {
-
-    for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end();
-         r0Itr != r0End; ++r0Itr) {
-      GraphNodeIterator nodeItr = *r0Itr;
-
-      solution.incR0Reductions();
-      setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex());
+    void backpropagate() {
+      while (!stack.empty()) {
+        computeSolution(stack.back());
+        stack.pop_back();
+      }
     }
 
-  }
-
-  void backpropagate() {
-    while (!stack.empty()) {
-      computeSolution(stack.back());
-      stack.pop_back();
-    }
-  }
+    void computeSolution(Graph::NodeItr nItr) {
 
-  void computeSolution(const GraphNodeIterator &nodeItr) {
+      NodeData &nodeData = getSolverNodeData(nItr);
 
-    NodeData &nodeData = g.getNodeData(nodeItr);
+      Vector v(g.getNodeCosts(nItr));
 
-    Vector v(g.getNodeCosts(nodeItr));
+      // Solve based on existing solved edges.
+      for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
+                         solvedEdgeEnd = nodeData.solverEdgesEnd();
+           solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
 
-    // Solve based on existing links.
-    for (typename NodeData::AdjLinkIterator
-         solvedLinkItr = nodeData.solvedLinksBegin(),
-         solvedLinkEnd = nodeData.solvedLinksEnd();
-         solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) {
+        Graph::EdgeItr eItr(*solvedEdgeItr);
+        Matrix &edgeCosts = g.getEdgeCosts(eItr);
 
-      GraphEdgeIterator solvedEdgeItr(*solvedLinkItr);
-      Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr);
+        if (nItr == g.getEdgeNode1(eItr)) {
+          Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getColAsVector(adjSolution);
+        }
+        else {
+          Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getRowAsVector(adjSolution);
+        }
 
-      if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) {
-        GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr));
-        unsigned adjSolution =
-          solution.getSelection(g.getNodeID(adjNode));
-        v += edgeCosts.getColAsVector(adjSolution);
-      }
-      else {
-        GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr));
-        unsigned adjSolution =
-          solution.getSelection(g.getNodeID(adjNode));
-        v += edgeCosts.getRowAsVector(adjSolution);
       }
 
+      setSolution(nItr, v.minIndex());
     }
 
-    setSolution(nodeItr, v.minIndex());
-  }
-
-  void computeSolutionCost(const SimpleGraph &orig) {
-    PBQPNum cost = 0.0;
-
-    for (SimpleGraph::ConstNodeIterator
-         nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-
-      unsigned nodeId = orig.getNodeID(nodeItr);
-
-      cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)];
+    void cleanup() {
+      h.cleanup();
+      nodeDataList.clear();
+      edgeDataList.clear();
     }
+  };
 
-    for (SimpleGraph::ConstEdgeIterator
-         edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-
-      SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr),
-                                     n2 = orig.getEdgeNode2Itr(edgeItr);
-      unsigned sol1 = solution.getSelection(orig.getNodeID(n1)),
-               sol2 = solution.getSelection(orig.getNodeID(n2));
-
-      cost += orig.getEdgeCosts(edgeItr)[sol1][sol2];
+  /// \brief PBQP heuristic solver class.
+  ///
+  /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
+  /// by calling
+  /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
+  ///
+  /// The choice of heuristic for the H parameter will affect both the solver
+  /// speed and solution quality. The heuristic should be chosen based on the
+  /// nature of the problem being solved.
+  /// Currently the only solver included with LLVM is the Briggs heuristic for
+  /// register allocation.
+  template <typename HImpl>
+  class HeuristicSolver {
+  public:
+    static Solution solve(Graph &g) {
+      HeuristicSolverImpl<HImpl> hs(g);
+      return hs.computeSolution();
     }
-
-    solution.setSolutionCost(cost);
-  }
-
-};
-
-template <typename Heuristic>
-class HeuristicSolver : public Solver {
-public:
-  Solution solve(const SimpleGraph &g) const {
-    HeuristicSolverImpl<Heuristic> solverImpl(g);
-    return solverImpl.getSolution();
-  }
-};
+  };
 
 }
 
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 1228f65..30d34d9 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -18,365 +18,447 @@
 #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 
+#include "llvm/Support/Compiler.h"
 #include "../HeuristicSolver.h"
+#include "../HeuristicBase.h"
 
 #include <set>
+#include <limits>
 
 namespace PBQP {
-namespace Heuristics {
-
-class Briggs {
-  public:
-
-    class NodeData;
-    class EdgeData;
-
-  private:
-
-    typedef HeuristicSolverImpl<Briggs> Solver;
-    typedef HSITypes<NodeData, EdgeData> HSIT;
-    typedef HSIT::SolverGraph SolverGraph;
-    typedef HSIT::GraphNodeIterator GraphNodeIterator;
-    typedef HSIT::GraphEdgeIterator GraphEdgeIterator;
-
-    class LinkDegreeComparator {
+  namespace Heuristics {
+
+    /// \brief PBQP Heuristic which applies an allocability test based on
+    ///        Briggs.
+    /// 
+    /// This heuristic assumes that the elements of cost vectors in the PBQP
+    /// problem represent storage options, with the first being the spill
+    /// option and subsequent elements representing legal registers for the
+    /// corresponding node. Edge cost matrices are likewise assumed to represent
+    /// register constraints.
+    /// If one or more nodes can be proven allocable by this heuristic (by
+    /// inspection of their constraint matrices) then the allocable node of
+    /// highest degree is selected for the next reduction and pushed to the
+    /// solver stack. If no nodes can be proven allocable then the node with
+    /// the lowest estimated spill cost is selected and push to the solver stack
+    /// instead.
+    /// 
+    /// This implementation is built on top of HeuristicBase.       
+    class Briggs : public HeuristicBase<Briggs> {
+    private:
+
+      class LinkDegreeComparator {
       public:
-        LinkDegreeComparator() : g(0) {}
-        LinkDegreeComparator(SolverGraph *g) : g(g) {}
-
-        bool operator()(const GraphNodeIterator &node1Itr,
-                        const GraphNodeIterator &node2Itr) const {
-          assert((g != 0) && "Graph object not set, cannot access node data.");
-          unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(),
-                   n2Degree = g->getNodeData(node2Itr).getLinkDegree();
-          if (n1Degree > n2Degree) {
+        LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
             return true;
-          }
-          else if (n1Degree < n2Degree) {
+          if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr))
             return false;
-          }
-          // else they're "equal" by degree, differentiate based on ID.
-          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+          return (&*n1Itr < &*n2Itr);
         }
-
       private:
-        SolverGraph *g;
-    };
+        HeuristicSolverImpl<Briggs> *s;
+      };
 
-    class SpillPriorityComparator {
+      class SpillCostComparator {
       public:
-        SpillPriorityComparator() : g(0) {}
-        SpillPriorityComparator(SolverGraph *g) : g(g) {}
-
-        bool operator()(const GraphNodeIterator &node1Itr,
-                        const GraphNodeIterator &node2Itr) const {
-          assert((g != 0) && "Graph object not set, cannot access node data.");
-          PBQPNum cost1 =
-            g->getNodeCosts(node1Itr)[0] /
-            g->getNodeData(node1Itr).getLinkDegree(),
-            cost2 =
-              g->getNodeCosts(node2Itr)[0] /
-              g->getNodeData(node2Itr).getLinkDegree();
-
-          if (cost1 < cost2) {
+        SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
+          : s(&s), g(&s.getGraph()) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
+                  cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
+          if (cost1 < cost2)
             return true;
-          }
-          else if (cost1 > cost2) {
+          if (cost1 > cost2)
             return false;
-          }
-          // else they'er "equal" again, differentiate based on address again.
-          return g->getNodeID(node1Itr) < g->getNodeID(node2Itr);
+          return (&*n1Itr < &*n2Itr);
         }
 
       private:
-        SolverGraph *g;
-    };
-
-    typedef std::set<GraphNodeIterator, LinkDegreeComparator>
-      RNAllocableNodeList;
-    typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator;
-
-    typedef std::set<GraphNodeIterator, SpillPriorityComparator>
-      RNUnallocableNodeList;
-    typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator;
-
-  public:
-
-    class NodeData {
-      private:
-        RNAllocableNodeListIterator rNAllocableNodeListItr;
-        RNUnallocableNodeListIterator rNUnallocableNodeListItr;
-        unsigned numRegOptions, numDenied, numSafe;
-        std::vector<unsigned> unsafeDegrees;
-        bool allocable;
-
-        void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
-            const GraphEdgeIterator &edgeItr, bool add) {
+        HeuristicSolverImpl<Briggs> *s;
+        Graph *g;
+      };
 
-          //assume we're adding...
-          unsigned udTarget = 0, dir = 1;
+      typedef std::list<Graph::NodeItr> RNAllocableList;
+      typedef RNAllocableList::iterator RNAllocableListItr;
 
-          if (!add) {
-            udTarget = 1;
-            dir = ~0;
-          }
+      typedef std::list<Graph::NodeItr> RNUnallocableList;  
+      typedef RNUnallocableList::iterator RNUnallocableListItr;
 
-          EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData();
+    public:
 
-          EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd;
+      struct NodeData {
+        typedef std::vector<unsigned> UnsafeDegreesArray;
+        bool isHeuristic, isAllocable, isInitialized;
+        unsigned numDenied, numSafe;
+        UnsafeDegreesArray unsafeDegrees;
+        RNAllocableListItr rnaItr;
+        RNUnallocableListItr rnuItr;
 
-          if (nodeItr == g.getEdgeNode1Itr(edgeItr)) {
-            numDenied += (dir * linkEdgeData.getWorstDegree());
-            edgeUnsafeBegin = linkEdgeData.unsafeBegin();
-            edgeUnsafeEnd = linkEdgeData.unsafeEnd();
-          }
-          else {
-            numDenied += (dir * linkEdgeData.getReverseWorstDegree());
-            edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin();
-            edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd();
-          }
+        NodeData()
+          : isHeuristic(false), isAllocable(false), isInitialized(false),
+            numDenied(0), numSafe(0) { }
+      };
 
-          assert((unsafeDegrees.size() ==
-                static_cast<unsigned>(
-                  std::distance(edgeUnsafeBegin, edgeUnsafeEnd)))
-              && "Unsafe array size mismatch.");
-
-          std::vector<unsigned>::iterator unsafeDegreesItr =
-            unsafeDegrees.begin();
-
-          for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin;
-              edgeUnsafeItr != edgeUnsafeEnd;
-              ++edgeUnsafeItr, ++unsafeDegreesItr) {
-
-            if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget))  {
-              numSafe -= dir;
-            }
-            *unsafeDegreesItr += (dir * (*edgeUnsafeItr));
-          }
-
-          allocable = (numDenied < numRegOptions) || (numSafe > 0);
-        }
-
-      public:
-
-        void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) {
-
-          numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1;
-
-          numSafe = numRegOptions; // Optimistic, correct below.
-          numDenied = 0; // Also optimistic.
-          unsafeDegrees.resize(numRegOptions, 0);
-
-          HSIT::NodeData &nodeData = g.getNodeData(nodeItr);
-
-          for (HSIT::NodeData::AdjLinkIterator
-              adjLinkItr = nodeData.adjLinksBegin(),
-              adjLinkEnd = nodeData.adjLinksEnd();
-              adjLinkItr != adjLinkEnd; ++adjLinkItr) {
-
-            addRemoveLink(g, nodeItr, *adjLinkItr, true);
-          }
-        }
-
-        bool isAllocable() const { return allocable; }
-
-        void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
-            const GraphEdgeIterator &adjEdge) {
-          addRemoveLink(g, nodeItr, adjEdge, true);
+      struct EdgeData {
+        typedef std::vector<unsigned> UnsafeArray;
+        unsigned worst, reverseWorst;
+        UnsafeArray unsafe, reverseUnsafe;
+        bool isUpToDate;
+
+        EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
+      };
+
+      /// \brief Construct an instance of the Briggs heuristic.
+      /// @param solver A reference to the solver which is using this heuristic.
+      Briggs(HeuristicSolverImpl<Briggs> &solver) :
+        HeuristicBase<Briggs>(solver) {}
+
+      /// \brief Determine whether a node should be reduced using optimal
+      ///        reduction.
+      /// @param nItr Node iterator to be considered.
+      /// @return True if the given node should be optimally reduced, false
+      ///         otherwise.
+      ///
+      /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
+      /// exception. Nodes whose spill cost (element 0 of their cost vector) is
+      /// infinite are checked for allocability first. Allocable nodes may be
+      /// optimally reduced, but nodes whose allocability cannot be proven are
+      /// selected for heuristic reduction instead.
+      bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+        if (getSolver().getSolverDegree(nItr) < 3) {
+          return true;
         }
-
-        void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr,
-            const GraphEdgeIterator &adjEdge) {
-          addRemoveLink(g, nodeItr, adjEdge, false);
+        // else
+        return false;
+      }
+
+      /// \brief Add a node to the heuristic reduce list.
+      /// @param nItr Node iterator to add to the heuristic reduce list.
+      void addToHeuristicReduceList(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        initializeNode(nItr);
+        nd.isHeuristic = true;
+        if (nd.isAllocable) {
+          nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+        } else {
+          nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
         }
-
-        void setRNAllocableNodeListItr(
-            const RNAllocableNodeListIterator &rNAllocableNodeListItr) {
-
-          this->rNAllocableNodeListItr = rNAllocableNodeListItr;
+      }
+
+      /// \brief Heuristically reduce one of the nodes in the heuristic
+      ///        reduce list.
+      /// @return True if a reduction takes place, false if the heuristic reduce
+      ///         list is empty.
+      ///
+      /// If the list of allocable nodes is non-empty a node is selected
+      /// from it and pushed to the stack. Otherwise if the non-allocable list
+      /// is non-empty a node is selected from it and pushed to the stack.
+      /// If both lists are empty the method simply returns false with no action
+      /// taken.
+      bool heuristicReduce() {
+        if (!rnAllocableList.empty()) {
+          RNAllocableListItr rnaItr =
+            min_element(rnAllocableList.begin(), rnAllocableList.end(),
+                        LinkDegreeComparator(getSolver()));
+          Graph::NodeItr nItr = *rnaItr;
+          rnAllocableList.erase(rnaItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
+        } else if (!rnUnallocableList.empty()) {
+          RNUnallocableListItr rnuItr =
+            min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
+                        SpillCostComparator(getSolver()));
+          Graph::NodeItr nItr = *rnuItr;
+          rnUnallocableList.erase(rnuItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
         }
-
-        RNAllocableNodeListIterator getRNAllocableNodeListItr() const {
-          return rNAllocableNodeListItr;
+        // else
+        return false;
+      }
+
+      /// \brief Prepare a change in the costs on the given edge.
+      /// @param eItr Edge iterator.    
+      void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        if (n1.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
+        if (n2.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
+
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+        ed.isUpToDate = false;
+      }
+
+      /// \brief Handle the change in the costs on the given edge.
+      /// @param eItr Edge iterator.
+      void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        // This is effectively the same as adding a new edge now, since
+        // we've factored out the costs of the old one.
+        handleAddEdge(eItr);
+      }
+
+      /// \brief Handle the addition of a new edge into the PBQP graph.
+      /// @param eItr Edge iterator for the added edge.
+      ///
+      /// Updates allocability of any nodes connected by this edge which are
+      /// being managed by the heuristic. If allocability changes they are
+      /// moved to the appropriate list.
+      void handleAddEdge(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        // If neither node is managed by the heuristic there's nothing to be
+        // done.
+        if (!n1.isHeuristic && !n2.isHeuristic)
+          return;
+
+        // Ok - we need to update at least one node.
+        computeEdgeContributions(eItr);
+
+        // Update node 1 if it's managed by the heuristic.
+        if (n1.isHeuristic) {
+          bool n1WasAllocable = n1.isAllocable;
+          addEdgeContributions(eItr, n1Itr);
+          updateAllocability(n1Itr);
+          if (n1WasAllocable && !n1.isAllocable) {
+            rnAllocableList.erase(n1.rnaItr);
+            n1.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
+          }
         }
 
-        void setRNUnallocableNodeListItr(
-            const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) {
-
-          this->rNUnallocableNodeListItr = rNUnallocableNodeListItr;
+        // Likewise for node 2.
+        if (n2.isHeuristic) {
+          bool n2WasAllocable = n2.isAllocable;
+          addEdgeContributions(eItr, n2Itr);
+          updateAllocability(n2Itr);
+          if (n2WasAllocable && !n2.isAllocable) {
+            rnAllocableList.erase(n2.rnaItr);
+            n2.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
+          }
         }
-
-        RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const {
-          return rNUnallocableNodeListItr;
+      }
+
+      /// \brief Handle disconnection of an edge from a node.
+      /// @param eItr Edge iterator for edge being disconnected.
+      /// @param nItr Node iterator for the node being disconnected from.
+      ///
+      /// Updates allocability of the given node and, if appropriate, moves the
+      /// node to a new list.
+      void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+
+        // If the node is not managed by the heuristic there's nothing to be
+        // done.
+        if (!nd.isHeuristic)
+          return;
+
+        EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Edge data is not up to date.");
+
+        // Update node.
+        bool ndWasAllocable = nd.isAllocable;
+        subtractEdgeContributions(eItr, nItr);
+        updateAllocability(nItr);
+
+        // If the node has gone optimal...
+        if (shouldOptimallyReduce(nItr)) {
+          nd.isHeuristic = false;
+          addToOptimalReduceList(nItr);
+          if (ndWasAllocable) {
+            rnAllocableList.erase(nd.rnaItr);
+          } else {
+            rnUnallocableList.erase(nd.rnuItr);
+          }
+        } else {
+          // Node didn't go optimal, but we might have to move it
+          // from "unallocable" to "allocable".
+          if (!ndWasAllocable && nd.isAllocable) {
+            rnUnallocableList.erase(nd.rnuItr);
+            nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+          }
         }
+      }
 
+    private:
 
-    };
+      NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+        return getSolver().getHeuristicNodeData(nItr);
+      }
 
-    class EdgeData {
-      private:
+      EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+        return getSolver().getHeuristicEdgeData(eItr);
+      }
 
-        typedef std::vector<unsigned> UnsafeArray;
+      // Work out what this edge will contribute to the allocability of the
+      // nodes connected to it.
+      void computeEdgeContributions(Graph::EdgeItr eItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
 
-        unsigned worstDegree,
-                 reverseWorstDegree;
-        UnsafeArray unsafe, reverseUnsafe;
+        if (ed.isUpToDate)
+          return; // Edge data is already up to date.
 
-      public:
+        Matrix &eCosts = getGraph().getEdgeCosts(eItr);
 
-        EdgeData() : worstDegree(0), reverseWorstDegree(0) {}
+        unsigned numRegs = eCosts.getRows() - 1,
+                 numReverseRegs = eCosts.getCols() - 1;
 
-        typedef UnsafeArray::const_iterator ConstUnsafeIterator;
+        std::vector<unsigned> rowInfCounts(numRegs, 0),
+                              colInfCounts(numReverseRegs, 0);        
 
-        void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) {
-          const Matrix &edgeCosts = g.getEdgeCosts(edgeItr);
-          unsigned numRegs = edgeCosts.getRows() - 1,
-                   numReverseRegs = edgeCosts.getCols() - 1;
+        ed.worst = 0;
+        ed.reverseWorst = 0;
+        ed.unsafe.clear();
+        ed.unsafe.resize(numRegs, 0);
+        ed.reverseUnsafe.clear();
+        ed.reverseUnsafe.resize(numReverseRegs, 0);
 
-          unsafe.resize(numRegs, 0);
-          reverseUnsafe.resize(numReverseRegs, 0);
+        for (unsigned i = 0; i < numRegs; ++i) {
+          for (unsigned j = 0; j < numReverseRegs; ++j) {
+            if (eCosts[i + 1][j + 1] ==
+                  std::numeric_limits<PBQPNum>::infinity()) {
+              ed.unsafe[i] = 1;
+              ed.reverseUnsafe[j] = 1;
+              ++rowInfCounts[i];
+              ++colInfCounts[j];
 
-          std::vector<unsigned> rowInfCounts(numRegs, 0),
-                                colInfCounts(numReverseRegs, 0);
+              if (colInfCounts[j] > ed.worst) {
+                ed.worst = colInfCounts[j];
+              }
 
-          for (unsigned i = 0; i < numRegs; ++i) {
-            for (unsigned j = 0; j < numReverseRegs; ++j) {
-              if (edgeCosts[i + 1][j + 1] ==
-                  std::numeric_limits<PBQPNum>::infinity()) {
-                unsafe[i] = 1;
-                reverseUnsafe[j] = 1;
-                ++rowInfCounts[i];
-                ++colInfCounts[j];
-
-                if (colInfCounts[j] > worstDegree) {
-                  worstDegree = colInfCounts[j];
-                }
-
-                if (rowInfCounts[i] > reverseWorstDegree) {
-                  reverseWorstDegree = rowInfCounts[i];
-                }
+              if (rowInfCounts[i] > ed.reverseWorst) {
+                ed.reverseWorst = rowInfCounts[i];
               }
             }
           }
         }
 
-        unsigned getWorstDegree() const { return worstDegree; }
-        unsigned getReverseWorstDegree() const { return reverseWorstDegree; }
-        ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); }
-        ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); }
-        ConstUnsafeIterator reverseUnsafeBegin() const {
-          return reverseUnsafe.begin();
+        ed.isUpToDate = true;
+      }
+
+      // Add the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) {
+            if (nd.unsafeDegrees[r]==0) {
+              --nd.numSafe;
+            }
+            ++nd.unsafeDegrees[r];
+          }
         }
-        ConstUnsafeIterator reverseUnsafeEnd() const {
-          return reverseUnsafe.end();
+      }
+
+      // Subtract the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) { 
+            if (nd.unsafeDegrees[r] == 1) {
+              ++nd.numSafe;
+            }
+            --nd.unsafeDegrees[r];
+          }
         }
-    };
-
-  void initialise(Solver &solver) {
-    this->s = &solver;
-    g = &s->getGraph();
-    rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g));
-    rNUnallocableBucket =
-      RNUnallocableNodeList(SpillPriorityComparator(g));
-    
-    for (GraphEdgeIterator
-         edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd();
-         edgeItr != edgeEnd; ++edgeItr) {
-
-      g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
-    }
-
-    for (GraphNodeIterator
-         nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd();
-         nodeItr != nodeEnd; ++nodeItr) {
-
-      g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr);
-    }
-  }
-
-  void addToRNBucket(const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
-
-    if (nodeData.isAllocable()) {
-      nodeData.setRNAllocableNodeListItr(
-        rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr));
-    }
-    else {
-      nodeData.setRNUnallocableNodeListItr(
-        rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr));
-    }
-  }
+      }
 
-  void removeFromRNBucket(const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
-
-    if (nodeData.isAllocable()) {
-      rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr());
-    }
-    else {
-      rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr());
-    }
-  }
+      void updateAllocability(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
+      }
 
-  void handleAddLink(const GraphEdgeIterator &edgeItr) {
-    // We assume that if we got here this edge is attached to at least
-    // one high degree node.
-    g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr);
-
-    GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr),
-                      n2Itr = g->getEdgeNode2Itr(edgeItr);
-   
-    HSIT::NodeData &n1Data = g->getNodeData(n1Itr),
-                   &n2Data = g->getNodeData(n2Itr);
-
-    if (n1Data.getLinkDegree() > 2) {
-      n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr);
-    }
-    if (n2Data.getLinkDegree() > 2) {
-      n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr);
-    }
-  }
+      void initializeNode(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
 
-  void handleRemoveLink(const GraphEdgeIterator &edgeItr,
-                        const GraphNodeIterator &nodeItr) {
-    NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData();
-    nodeData.handleRemoveLink(*g, nodeItr, edgeItr);
-  }
+        if (nd.isInitialized)
+          return; // Node data is already up to date.
 
-  void processRN() {
-    
-    if (!rNAllocableBucket.empty()) {
-      GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin();
-      //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n";
-      rNAllocableBucket.erase(rNAllocableBucket.begin());
-      s->pushStack(selectedNodeItr);
-      s->unlinkNode(selectedNodeItr);
-    }
-    else {
-      GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin();
-      //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n";
-      rNUnallocableBucket.erase(rNUnallocableBucket.begin());
-      s->pushStack(selectedNodeItr);
-      s->unlinkNode(selectedNodeItr);
-    }
- 
-  }
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
 
-  bool rNBucketEmpty() const {
-    return (rNAllocableBucket.empty() && rNUnallocableBucket.empty());
-  }
+        nd.numDenied = 0;
+        nd.numSafe = numRegs;
+        nd.unsafeDegrees.resize(numRegs, 0);
 
-private:
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
 
-  Solver *s;
-  SolverGraph *g;
-  RNAllocableNodeList rNAllocableBucket;
-  RNUnallocableNodeList rNUnallocableBucket;
-};
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
+                           aeEnd = getSolver().solverEdgesEnd(nItr);
+             aeItr != aeEnd; ++aeItr) {
+          
+          Graph::EdgeItr eItr = *aeItr;
+          computeEdgeContributions(eItr);
+          addEdgeContributions(eItr, nItr);
+        }
 
+        updateAllocability(nItr);
+        nd.isInitialized = true;
+      }
+
+      void handleRemoveNode(Graph::NodeItr xnItr) {
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+        std::vector<Graph::EdgeItr> edgesToRemove;
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
+                           aeEnd = getSolver().solverEdgesEnd(xnItr);
+             aeItr != aeEnd; ++aeItr) {
+          Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
+          handleRemoveEdge(*aeItr, ynItr);
+          edgesToRemove.push_back(*aeItr);
+        }
+        while (!edgesToRemove.empty()) {
+          getSolver().removeSolverEdge(edgesToRemove.back());
+          edgesToRemove.pop_back();
+        }
+      }
 
+      RNAllocableList rnAllocableList;
+      RNUnallocableList rnUnallocableList;
+    };
 
-}
+  }
 }
 
 
diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/Math.h
index 20737a2..e7598bf 100644
--- a/lib/CodeGen/PBQP/PBQPMath.h
+++ b/lib/CodeGen/PBQP/Math.h
@@ -1,4 +1,4 @@
-//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
+//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H 
-#define LLVM_CODEGEN_PBQP_PBQPMATH_H
+#ifndef LLVM_CODEGEN_PBQP_MATH_H 
+#define LLVM_CODEGEN_PBQP_MATH_H
 
 #include <cassert>
 #include <algorithm>
@@ -16,7 +16,7 @@
 
 namespace PBQP {
 
-typedef double PBQPNum;
+typedef float PBQPNum;
 
 /// \brief PBQP Vector class.
 class Vector {
@@ -285,4 +285,4 @@ OStream& operator<<(OStream &os, const Matrix &m) {
 
 }
 
-#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP
+#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h
deleted file mode 100644
index 13e63ce..0000000
--- a/lib/CodeGen/PBQP/SimpleGraph.h
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Simple PBQP graph class representing a PBQP problem. Graphs of this type
-// can be passed to a PBQPSolver instance to solve the PBQP problem.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
-#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
-
-#include "GraphBase.h"
-
-namespace PBQP {
-
-class SimpleEdge;
-
-class SimpleNode : public NodeBase<SimpleNode, SimpleEdge> {
-public:
-  SimpleNode(const Vector &costs) :
-    NodeBase<SimpleNode, SimpleEdge>(costs) {}
-};
-
-class SimpleEdge : public EdgeBase<SimpleNode, SimpleEdge> {
-public:
-  SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr,
-             const Matrix &costs) :
-    EdgeBase<SimpleNode, SimpleEdge>(node1Itr, node2Itr, costs) {}
-};
-
-class SimpleGraph : public GraphBase<SimpleNode, SimpleEdge> {
-private:
-
-  typedef GraphBase<SimpleNode, SimpleEdge> PGraph;
-
-  void copyFrom(const SimpleGraph &other) {
-    assert(other.areNodeIDsValid() &&
-           "Cannot copy from another graph unless IDs have been assigned.");
-   
-    std::vector<NodeIterator> newNodeItrs(other.getNumNodes());
-
-    for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd();
-         nItr != nEnd; ++nItr) {
-      newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr));
-    }
-
-    for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd();
-         eItr != eEnd; ++eItr) {
-
-      unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)),
-               node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr));
-
-      addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID],
-              other.getEdgeCosts(eItr));
-    }
-  }
-
-  void copyFrom(SimpleGraph &other) {
-    if (!other.areNodeIDsValid()) {
-      other.assignNodeIDs();
-    }
-    copyFrom(const_cast<const SimpleGraph&>(other));
-  }
-
-public:
-
-  SimpleGraph() {}
-
-
-  SimpleGraph(const SimpleGraph &other) : PGraph() {
-    copyFrom(other);
-  }
-
-  SimpleGraph& operator=(const SimpleGraph &other) {
-    clear();
-    copyFrom(other);
-    return *this;
-  }
-
-  NodeIterator addNode(const Vector &costs) {
-    return PGraph::addConstructedNode(SimpleNode(costs));
-  }
-
-  EdgeIterator addEdge(const NodeIterator &node1Itr,
-                       const NodeIterator &node2Itr,
-                       const Matrix &costs) {
-    return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs));
-  }
-
-};
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
index aee684d..294b537 100644
--- a/lib/CodeGen/PBQP/Solution.h
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -7,81 +7,51 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Annotated PBQP Graph class. This class is used internally by the PBQP solver
-// to cache information to speed up reduction.
+// PBQP Solution class.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
 #define LLVM_CODEGEN_PBQP_SOLUTION_H
 
-#include "PBQPMath.h"
+#include "Math.h"
+#include "Graph.h"
 
-namespace PBQP {
-
-class Solution {
-
-  friend class SolverImplementation;
-
-private:
-
-  std::vector<unsigned> selections;
-  PBQPNum solutionCost;
-  bool provedOptimal;
-  unsigned r0Reductions, r1Reductions,
-           r2Reductions, rNReductions;
-
-public:
-
-  Solution() :
-    solutionCost(0.0), provedOptimal(false),
-    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
-
-  Solution(unsigned length, bool assumeOptimal) :
-    selections(length), solutionCost(0.0), provedOptimal(assumeOptimal),
-    r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
-
-  void setProvedOptimal(bool provedOptimal) {
-    this->provedOptimal = provedOptimal;
-  }
-
-  void setSelection(unsigned nodeID, unsigned selection) {
-    selections[nodeID] = selection;
-  }
+#include <map>
 
-  void setSolutionCost(PBQPNum solutionCost) {
-    this->solutionCost = solutionCost;
-  }
-
-  void incR0Reductions() { ++r0Reductions; }
-  void incR1Reductions() { ++r1Reductions; }
-  void incR2Reductions() { ++r2Reductions; }
-  void incRNReductions() { ++rNReductions; }
-
-  unsigned numNodes() const { return selections.size(); }
-
-  unsigned getSelection(unsigned nodeID) const {
-    return selections[nodeID];
-  }
-
-  PBQPNum getCost() const { return solutionCost; }
-
-  bool isProvedOptimal() const { return provedOptimal; }
-
-  unsigned getR0Reductions() const { return r0Reductions; }
-  unsigned getR1Reductions() const { return r1Reductions; }
-  unsigned getR2Reductions() const { return r2Reductions; }
-  unsigned getRNReductions() const { return rNReductions; }
-
-  bool operator==(const Solution &other) const {
-    return (selections == other.selections);
-  }
-
-  bool operator!=(const Solution &other) const {
-    return !(*this == other);
-  }
+namespace PBQP {
 
-};
+  /// \brief Represents a solution to a PBQP problem.
+  ///
+  /// To get the selection for each node in the problem use the getSelection method.
+  class Solution {
+  private:
+    typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
+    SelectionsMap selections;
+
+  public:
+
+    /// \brief Number of nodes for which selections have been made.
+    /// @return Number of nodes for which selections have been made.
+    unsigned numNodes() const { return selections.size(); }
+
+    /// \brief Set the selection for a given node.
+    /// @param nItr Node iterator.
+    /// @param selection Selection for nItr.
+    void setSelection(Graph::NodeItr nItr, unsigned selection) {
+      selections[nItr] = selection;
+    }
+
+    /// \brief Get a node's selection.
+    /// @param nItr Node iterator.
+    /// @return The selection for nItr;
+    unsigned getSelection(Graph::NodeItr nItr) const {
+      SelectionsMap::const_iterator sItr = selections.find(nItr);
+      assert(sItr != selections.end() && "No selection for node.");
+      return sItr->second;
+    }
+
+  };
 
 }
 
diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h
deleted file mode 100644
index a445de8..0000000
--- a/lib/CodeGen/PBQP/Solver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_SOLVER_H
-#define LLVM_CODEGEN_PBQP_SOLVER_H
-
-#include "SimpleGraph.h"
-#include "Solution.h"
-
-namespace PBQP {
-
-/// \brief Interface for solver classes.
-class Solver {
-public:
-
-  virtual ~Solver() = 0;
-  virtual Solution solve(const SimpleGraph &orig) const = 0;
-};
-
-Solver::~Solver() {}
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SOLVER_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 365df30..b740c68 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -95,14 +96,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
 ///
 bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
                                              MachineBasicBlock &MBB) {
-  if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+  if (MBB.empty() || !MBB.front().isPHI())
     return false;   // Quick exit for basic blocks without PHIs.
 
   // Get an iterator to the first instruction after the last PHI node (this may
   // also be the end of the basic block).
   MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
 
-  while (MBB.front().getOpcode() == TargetInstrInfo::PHI)
+  while (MBB.front().isPHI())
     LowerAtomicPHINode(MBB, AfterPHIsIt);
 
   return true;
@@ -115,7 +116,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
   for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
     unsigned SrcReg = MPhi->getOperand(i).getReg();
     const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
-    if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+    if (!DefMI || !DefMI->isImplicitDef())
       return false;
   }
   return true;
@@ -197,7 +198,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // If all sources of a PHI node are implicit_def, just emit an
     // implicit_def instead of a copy.
     BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
-            TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg);
+            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
   else {
     // Can we reuse an earlier PHI node? This only happens for critical edges,
     // typically those created by tail duplication.
@@ -281,7 +282,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // If source is defined by an implicit def, there is no need to insert a
     // copy.
     MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
-    if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+    if (DefMI->isImplicitDef()) {
       ImpDefs.insert(DefMI);
       continue;
     }
@@ -375,7 +376,7 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
   for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
-         BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+         BBI != BBE && BBI->isPHI(); ++BBI)
       for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
         ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
                                      BBI->getOperand(i).getReg())];
@@ -384,12 +385,11 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
 bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
                                          MachineBasicBlock &MBB,
                                          LiveVariables &LV) {
-  if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI ||
-      MBB.isLandingPad())
+  if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
     return false;   // Quick exit for basic blocks without PHIs.
 
   for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
-       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+       BBI != BBE && BBI->isPHI(); ++BBI) {
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
       MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
@@ -438,7 +438,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   // Fix PHI nodes in B so they refer to NMBB instead of A
   for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
-       i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i)
+       i != e && i->isPHI(); ++i)
     for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
       if (i->getOperand(ni+1).getMBB() == A)
         i->getOperand(ni+1).setMBB(NMBB);
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 1bcc9dc..f3ab9e2 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -14,10 +14,11 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 
 namespace llvm {
-
+  class LiveVariables;
+  
   /// Lower PHI instructions to copies.  
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo  *MRI; // Machine register information
@@ -108,13 +109,29 @@ namespace llvm {
     // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
     // also after any exception handling labels: in landing pads execution
     // starts at the label, so any copies placed before it won't be executed!
+    // We also deal with DBG_VALUEs, which are a bit tricky:
+    //  PHI
+    //  DBG_VALUE
+    //  LABEL
+    // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
+    // needs to be annulled or, better, moved to follow the label, as well.
+    //  PHI
+    //  DBG_VALUE
+    //  no label
+    // Here it is not a good idea to skip the DBG_VALUE.
+    // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
+    // maximally stupid.
     MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
                                                 MachineBasicBlock::iterator I) {
       // Rather than assuming that EH labels come before other kinds of labels,
       // just skip all labels.
-      while (I != MBB.end() &&
-             (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
+      while (I != MBB.end() && 
+             (I->isPHI() || I->isLabel() || I->isDebugValue())) {
+        if (I->isDebugValue() && I->getNumOperands()==3 && 
+            I->getOperand(0).isReg())
+          I->getOperand(0).setReg(0U);
         ++I;
+      }
       return I;
     }
 
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 8cbc8c2..70e91aa 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -686,8 +686,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
     DefIdx = DefIdx.getDefIndex();
     
-    assert(DI->getOpcode() != TargetInstrInfo::PHI &&
-           "PHI instr in code during pre-alloc splitting.");
+    assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
     VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
     
     // If the def is a move, set the copy field.
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index a00f450..d7179b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -49,9 +49,9 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
       Reg == SrcReg)
     return true;
 
-  if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+  if (OpIdx == 2 && MI->isSubregToReg())
     return true;
-  if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+  if (OpIdx == 1 && MI->isExtractSubreg())
     return true;
   return false;
 }
@@ -88,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
          I != E; ) {
       MachineInstr *MI = &*I;
       ++I;
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      if (MI->isImplicitDef()) {
         unsigned Reg = MI->getOperand(0).getReg();
         ImpDefRegs.insert(Reg);
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -99,7 +99,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         continue;
       }
 
-      if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+      if (MI->isInsertSubreg()) {
         MachineOperand &MO = MI->getOperand(2);
         if (ImpDefRegs.count(MO.getReg())) {
           // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
@@ -127,7 +127,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         // Use is a copy, just turn it into an implicit_def.
         if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
           bool isKill = MO.isKill();
-          MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
           for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
             MI->RemoveOperand(j);
           if (isKill) {
@@ -187,7 +187,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
              DE = mri_->def_end(); DI != DE; ++DI) {
         MachineInstr *DeadImpDef = &*DI;
-        if (DeadImpDef->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+        if (!DeadImpDef->isImplicitDef()) {
           Skip = true;
           break;
         }
@@ -205,10 +205,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // Process each use instruction once.
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
              UE = mri_->use_end(); UI != UE; ++UI) {
-        MachineInstr *RMI = &*UI;
-        MachineBasicBlock *RMBB = RMI->getParent();
-        if (RMBB == MBB)
+        if (UI.getOperand().isUndef())
           continue;
+        MachineInstr *RMI = &*UI;
         if (ModInsts.insert(RMI))
           RUses.push_back(RMI);
       }
@@ -220,7 +219,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
         if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
             Reg == SrcReg) {
-          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+          RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
 
           bool isKill = false;
           SmallVector<unsigned, 4> Ops;
@@ -264,8 +263,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         }
       }
       RUses.clear();
+      ModInsts.clear();
     }
-    ModInsts.clear();
     ImpDefRegs.clear();
     ImpDefMIs.clear();
   }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 709d46a..040259e 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -161,7 +161,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
         HasCalls = true;
         FrameSDOps.push_back(I);
-      } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) {
+      } else if (I->isInlineAsm()) {
         // An InlineAsm might be a call; assume it is to get the stack frame
         // aligned correctly for calls.
         HasCalls = true;
@@ -476,8 +476,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *FFI = Fn.getFrameInfo();
 
-  unsigned MaxAlign = 1;
-
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
   // of stack growth -- so it's always nonnegative.
@@ -517,9 +515,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       Offset += FFI->getObjectSize(i);
 
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack,
-      // then increase the stack alignment to match.
-      MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
@@ -529,9 +524,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack,
-      // then increase the stack alignment to match.
-      MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
@@ -540,6 +532,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     }
   }
 
+  unsigned MaxAlign = FFI->getMaxAlignment();
+
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
@@ -605,11 +599,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Update frame info to pretend that this is part of the stack...
   FFI->setStackSize(Offset - LocalAreaOffset);
-
-  // Remember the required stack alignment in case targets need it to perform
-  // dynamic stack alignment.
-  if (MaxAlign > FFI->getMaxAlignment())
-    FFI->setMaxAlignment(MaxAlign);
 }
 
 
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index cbb5826..04303cf 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -490,9 +490,12 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
   // If the virtual register is already available, just update the instruction
   // and return.
   if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
-    MarkPhysRegRecentlyUsed(PR);       // Already have this value available!
     MI->getOperand(OpNum).setReg(PR);  // Assign the input register
-    getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    if (!MI->isDebugValue()) {
+      // Do not do these for DBG_VALUE as they can affect codegen.
+      MarkPhysRegRecentlyUsed(PR);       // Already have this value available!
+      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    }
     return MI;
   }
 
@@ -531,7 +534,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
     std::string msg;
     raw_string_ostream Msg(msg);
     Msg << "Ran out of registers during register allocation!";
-    if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+    if (MI->isInlineAsm()) {
       Msg << "\nPlease check your inline asm statement for invalid "
            << "constraints:\n";
       MI->print(Msg, TM);
@@ -544,7 +547,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
       std::string msg;
       raw_string_ostream Msg(msg);
       Msg << "Ran out of registers during register allocation!";
-      if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+      if (MI->isInlineAsm()) {
         Msg << "\nPlease check your inline asm statement for invalid "
              << "constraints:\n";
         MI->print(Msg, TM);
@@ -609,6 +612,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
   DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
        I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       MachineOperand& MO = I->getOperand(i);
       // Uses don't trigger any flags, but we need to save
@@ -691,7 +696,13 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
     bool usedOutsideBlock = isPhysReg ? false :   
           UsedInMultipleBlocks.test(MO.getReg() -  
                                     TargetRegisterInfo::FirstVirtualRegister);
-    if (!isPhysReg && !usedOutsideBlock)
+    if (!isPhysReg && !usedOutsideBlock) {
+      // DBG_VALUE complicates this:  if the only refs of a register outside
+      // this block are DBG_VALUE, we can't keep the reg live just for that,
+      // as it will cause the reg to be spilled at the end of this block when
+      // it wouldn't have been otherwise.  Nullify the DBG_VALUEs when that
+      // happens.
+      bool UsedByDebugValueOnly = false;
       for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
            UE = MRI.reg_end(); UI != UE; ++UI)
         // Two cases:
@@ -699,12 +710,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
         // - used in the same block before it is defined (loop)
         if (UI->getParent() != &MBB ||
             (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+          if (UI->isDebugValue()) {
+            UsedByDebugValueOnly = true;
+            continue;
+          }
+          // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
           UsedInMultipleBlocks.set(MO.getReg() - 
                                    TargetRegisterInfo::FirstVirtualRegister);
           usedOutsideBlock = true;
+          UsedByDebugValueOnly = false;
           break;
         }
-    
+      if (UsedByDebugValueOnly)
+        for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+             UE = MRI.reg_end(); UI != UE; ++UI)
+          if (UI->isDebugValue() &&
+              (UI->getParent() != &MBB ||
+               (MO.isDef() && precedes(&*UI, MI))))
+            UI.getOperand().setReg(0U);
+    }
+  
     // Physical registers and those that are not live-out of the block
     // are killed/dead at their last use/def within this block.
     if (isPhysReg || !usedOutsideBlock) {
@@ -764,8 +789,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     // Determine whether this is a copy instruction.  The cases where the
     // source or destination are phys regs are handled specially.
     unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
+    unsigned SrcCopyPhysReg = 0U;
     bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, 
                                    SrcCopySubReg, DstCopySubReg);
+    if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
+      SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
 
     // Loop over the implicit uses, making sure that they are at the head of the
     // use order list, so they don't get reallocated.
@@ -793,7 +821,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     // have in them, then mark them unallocatable.
     // If any virtual regs are earlyclobber, allocate them now (before
     // freeing inputs that are killed).
-    if (MI->getOpcode()==TargetInstrInfo::INLINEASM) {
+    if (MI->isInlineAsm()) {
       for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
         MachineOperand& MO = MI->getOperand(i);
         if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
@@ -838,6 +866,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
     }
 
+    // If a DBG_VALUE says something is located in a spilled register,
+    // change the DBG_VALUE to be undef, which prevents the register
+    // from being reloaded here.  Doing that would change the generated
+    // code, unless another use immediately follows this instruction.
+    if (MI->isDebugValue() &&
+        MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
+      unsigned VirtReg = MI->getOperand(0).getReg();
+      if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+          !getVirt2PhysRegMapSlot(VirtReg))
+        MI->getOperand(0).setReg(0U);
+    }
+
     // Get the used operands into registers.  This has the potential to spill
     // incoming values if we are out of registers.  Note that we completely
     // ignore physical register uses here.  We assume that if an explicit
@@ -965,13 +1005,26 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 
         // If DestVirtReg already has a value, use it.
         if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+          // If this is a copy try to reuse the input as the output;
+          // that will make the copy go away.
           // If this is a copy, the source reg is a phys reg, and
           // that reg is available, use that phys reg for DestPhysReg.
+          // If this is a copy, the source reg is a virtual reg, and
+          // the phys reg that was assigned to that virtual reg is now
+          // available, use that phys reg for DestPhysReg.  (If it's now
+          // available that means this was the last use of the source.)
           if (isCopy &&
               TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
               isPhysRegAvailable(SrcCopyReg)) {
             DestPhysReg = SrcCopyReg;
             assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+          } else if (isCopy &&
+              TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+              SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+              MF->getRegInfo().getRegClass(DestVirtReg)->
+                               contains(SrcCopyPhysReg)) {
+            DestPhysReg = SrcCopyPhysReg;
+            assignVirtToPhysReg(DestVirtReg, DestPhysReg);
           } else
             DestPhysReg = getReg(MBB, MI, DestVirtReg);
         }
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index fc59653..2701faf 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -32,7 +32,7 @@
 #define DEBUG_TYPE "regalloc"
 
 #include "PBQP/HeuristicSolver.h"
-#include "PBQP/SimpleGraph.h"
+#include "PBQP/Graph.h"
 #include "PBQP/Heuristics/Briggs.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
@@ -58,12 +58,12 @@ using namespace llvm;
 
 static RegisterRegAlloc
 registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
-                      llvm::createPBQPRegisterAllocator);
+                       llvm::createPBQPRegisterAllocator);
 
 static cl::opt<bool>
 pbqpCoalescing("pbqp-coalescing",
-               cl::desc("Attempt coalescing during PBQP register allocation."),
-               cl::init(false), cl::Hidden);
+                cl::desc("Attempt coalescing during PBQP register allocation."),
+                cl::init(false), cl::Hidden);
 
 namespace {
 
@@ -114,6 +114,8 @@ namespace {
 
     typedef std::set<LiveInterval*> LiveIntervalSet;
 
+    typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+
     MachineFunction *mf;
     const TargetMachine *tm;
     const TargetRegisterInfo *tri;
@@ -130,6 +132,7 @@ namespace {
     AllowedSetMap allowedSets;
     LiveIntervalSet vregIntervalsToAlloc,
                     emptyVRegIntervals;
+    NodeVector problemNodes;
 
 
     /// Builds a PBQP cost vector.
@@ -174,7 +177,7 @@ namespace {
     /// allocation problem for this function.
     ///
     /// @return a PBQP solver object for the register allocation problem.
-    PBQP::SimpleGraph constructPBQPProblem();
+    PBQP::Graph constructPBQPProblem();
 
     /// \brief Adds a stack interval if the given live interval has been
     /// spilled. Used to support stack slot coloring.
@@ -408,16 +411,16 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
       // We also need any physical regs to be allocable, coalescing with
       // a non-allocable register is invalid.
       if (srcRegIsPhysical) {
-        if (std::find(srcRegClass->allocation_order_begin(*mf),
-                      srcRegClass->allocation_order_end(*mf), srcReg) ==
-            srcRegClass->allocation_order_end(*mf))
+        if (std::find(dstRegClass->allocation_order_begin(*mf),
+                      dstRegClass->allocation_order_end(*mf), srcReg) ==
+            dstRegClass->allocation_order_end(*mf))
           continue;
       }
 
       if (dstRegIsPhysical) {
-        if (std::find(dstRegClass->allocation_order_begin(*mf),
-                      dstRegClass->allocation_order_end(*mf), dstReg) ==
-            dstRegClass->allocation_order_end(*mf))
+        if (std::find(srcRegClass->allocation_order_begin(*mf),
+                      srcRegClass->allocation_order_end(*mf), dstReg) ==
+            srcRegClass->allocation_order_end(*mf))
           continue;
       }
 
@@ -439,6 +442,12 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
                vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
                vniItr != vniEnd; ++vniItr) {
 
+          // If we find a poorly defined def we err on the side of caution.
+          if (!(*vniItr)->def.isValid()) {
+            badDef = true;
+            break;
+          }
+
           // If we find a def that kills the coalescing opportunity then
           // record it and break from the loop.
           if (dstLI->liveAt((*vniItr)->def)) {
@@ -460,6 +469,11 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
           if ((*vniItr)->getCopy() == instr)
             continue;
 
+          if (!(*vniItr)->def.isValid()) {
+            badDef = true;
+            break;
+          }
+
           if (srcLI->liveAt((*vniItr)->def)) {
             badDef = true;
             break;
@@ -510,11 +524,10 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() {
   }
 }
 
-PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() {
+PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
 
   typedef std::vector<const LiveInterval*> LIVector;
   typedef std::vector<unsigned> RegVector;
-  typedef std::vector<PBQP::SimpleGraph::NodeIterator> NodeVector;
 
   // This will store the physical intervals for easy reference.
   LIVector physIntervals;
@@ -553,8 +566,8 @@ PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() {
   }
 
   // Construct a PBQP solver for this problem
-  PBQP::SimpleGraph problem;
-  NodeVector problemNodes(vregIntervalsToAlloc.size());
+  PBQP::Graph problem;
+  problemNodes.resize(vregIntervalsToAlloc.size());
 
   // Resize allowedSets container appropriately.
   allowedSets.resize(vregIntervalsToAlloc.size());
@@ -657,12 +670,7 @@ PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() {
     }
   }
 
-  problem.assignNodeIDs();
-
   assert(problem.getNumNodes() == allowedSets.size());
-  for (unsigned i = 0; i < allowedSets.size(); ++i) {
-    assert(problem.getNodeItr(i) == problemNodes[i]);
-  }
 /*
   std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
             << problem.getNumEdges() << " edges.\n";
@@ -696,10 +704,6 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
 
 bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
 
-  // Assert that this is a valid solution to the regalloc problem.
-  assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() &&
-         "Invalid (infinite cost) solution for PBQP problem.");
-
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
@@ -709,7 +713,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
   // Iterate over the nodes mapping the PBQP solution to a register assignment.
   for (unsigned node = 0; node < node2LI.size(); ++node) {
     unsigned virtReg = node2LI[node]->reg,
-             allocSelection = solution.getSelection(node);
+             allocSelection = solution.getSelection(problemNodes[node]);
 
 
     // If the PBQP solution is non-zero it's a physical register...
@@ -849,7 +853,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   vrm = &getAnalysis<VirtRegMap>();
 
-  DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
+  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -876,10 +880,9 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      PBQP::SimpleGraph problem = constructPBQPProblem();
-      PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver;
-      problem.assignNodeIDs();
-      PBQP::Solution solution = solver.solve(problem);
+      PBQP::Graph problem = constructPBQPProblem();
+      PBQP::Solution solution =
+        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
 
       pbqpAllocComplete = mapPBQPToRegAlloc(solution);
 
@@ -895,6 +898,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   li2Node.clear();
   node2LI.clear();
   allowedSets.clear();
+  problemNodes.clear();
 
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8883064..7da7848 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1881,7 +1881,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                                        LN0->getChain(), LN0->getBasePtr(),
                                        LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1903,7 +1904,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1935,7 +1937,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(),
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
           AddToWorkList(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1970,7 +1973,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), NewPtr,
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), Alignment);
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
           AddToWorkList(N);
           CombineTo(LN0, Load, Load.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -2640,7 +2644,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 
       // If the shift is not a no-op (in which case this should be just a sign
       // extend already), the truncated to type is legal, sign_extend is legal
-      // on that type, and the the truncate to that type is both legal and free,
+      // on that type, and the truncate to that type is both legal and free,
       // perform the transform.
       if ((ShiftAmt > 0) &&
           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
@@ -3143,7 +3147,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3185,7 +3190,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3220,6 +3226,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                        NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                         DAG.getSetCC(N->getDebugLoc(),
+                                      TLI.getSetCCResultType(VT),
+                                      N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                         NegOne, DAG.getConstant(0, VT));
   }
   
   
@@ -3307,7 +3321,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3349,7 +3364,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
@@ -3463,7 +3479,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3505,7 +3522,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                      VT, LN0->getChain(), LN0->getBasePtr(),
                                      LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), MemVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3628,10 +3646,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     SDValue Load = (ExtType == ISD::NON_EXTLOAD)
       ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                     LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), NewAlign)
+                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
       : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       ExtVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                       NewAlign);
 
     // Replace the old load's chain with the new load's chain.
     WorkListRemover DeadNodes(*this);
@@ -3718,7 +3737,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3734,7 +3754,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3818,7 +3839,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
                          LD1->getBasePtr(), LD1->getSrcValue(),
-                         LD1->getSrcValueOffset(), false, Align);
+                         LD1->getSrcValueOffset(), false, false, Align);
   }
 
   return SDValue();
@@ -3888,7 +3909,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                  LN0->getBasePtr(),
                                  LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                                 LN0->isVolatile(), OrigAlign);
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
@@ -4484,7 +4506,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(),
                                      N0.getValueType(),
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
@@ -4952,7 +4975,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                               LD->getValueType(0),
                               Chain, Ptr, LD->getSrcValue(),
                               LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), Align);
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
     }
   }
 
@@ -5034,7 +5057,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                BetterChain, Ptr,
                                LD->getSrcValue(), LD->getSrcValueOffset(),
-                               LD->isVolatile(), LD->getAlignment());
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
       } else {
         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                   LD->getValueType(0),
@@ -5042,6 +5066,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                                   LD->getSrcValueOffset(),
                                   LD->getMemoryVT(),
                                   LD->isVolatile(),
+                                  LD->isNonTemporal(),
                                   LD->getAlignment());
       }
 
@@ -5141,13 +5166,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                   LD->getChain(), NewPtr,
                                   LD->getSrcValue(), LD->getSrcValueOffset(),
-                                  LD->isVolatile(), NewAlign);
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                    NewVal, NewPtr,
                                    ST->getSrcValue(), ST->getSrcValueOffset(),
-                                   false, NewAlign);
+                                   false, false, NewAlign);
 
       AddToWorkList(NewPtr.getNode());
       AddToWorkList(NewLD.getNode());
@@ -5176,7 +5202,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
                                  Ptr, ST->getSrcValue(),
                                  ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), Align);
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
     }
   }
 
@@ -5193,7 +5219,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
       return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                           Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
   }
 
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -5219,7 +5246,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         }
         break;
       case MVT::f64:
@@ -5231,7 +5258,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         } else if (!ST->isVolatile() &&
                    TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           // Many FP stores are not made apparent until after legalize, e.g. for
@@ -5245,18 +5272,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           int SVOffset = ST->getSrcValueOffset();
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
 
           SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
                                      Ptr, ST->getSrcValue(),
                                      ST->getSrcValueOffset(),
-                                     isVolatile, ST->getAlignment());
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
           Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                             DAG.getConstant(4, Ptr.getValueType()));
           SVOffset += 4;
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
                                      Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, Alignment);
+                                     SVOffset, isVolatile, isNonTemporal,
+                                     Alignment);
           return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                              St0, St1);
         }
@@ -5278,12 +5308,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                       ST->getSrcValue(),ST->getSrcValueOffset(),
-                                      ST->getMemoryVT(),
-                                      ST->isVolatile(), ST->getAlignment());
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
       } else {
         ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                  ST->getSrcValue(), ST->getSrcValueOffset(),
-                                 ST->isVolatile(), ST->getAlignment());
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
       }
 
       // Create token to keep both nodes around.
@@ -5317,7 +5348,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
                                Ptr, ST->getSrcValue(),
                                ST->getSrcValueOffset(), ST->getMemoryVT(),
-                               ST->isVolatile(), ST->getAlignment());
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
@@ -5350,7 +5382,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                              Ptr, ST->getSrcValue(),
                              ST->getSrcValueOffset(), ST->getMemoryVT(),
-                             ST->isVolatile(), ST->getAlignment());
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
   }
 
   return ReduceLoadOpStoreWidth(N);
@@ -5395,12 +5428,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue InVec = N->getOperand(0);
 
  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-   // If the operand is wider than the vector element type then it is implicitly
-   // truncated.  Make that explicit here.
+   // Check if the result type doesn't match the inserted element type. A
+   // SCALAR_TO_VECTOR may truncate the inserted element and the
+   // EXTRACT_VECTOR_ELT may widen the extracted vector.
    EVT EltVT = InVec.getValueType().getVectorElementType();
    SDValue InOp = InVec.getOperand(0);
-   if (InOp.getValueType() != EltVT)
-     return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
+   EVT NVT = N->getValueType(0);
+   if (InOp.getValueType() != NVT) {
+     assert(InOp.getValueType().isInteger() && NVT.isInteger());
+     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+   }
    return InOp;
  }
 
@@ -5491,7 +5528,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
     return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                       LN0->isVolatile(), Align);
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
   }
 
   return SDValue();
@@ -5871,6 +5908,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                LLD->getChain(),
                                Addr, 0, 0,
                                LLD->isVolatile(),
+                               LLD->isNonTemporal(),
                                LLD->getAlignment());
           } else {
             Load = DAG.getExtLoad(LLD->getExtensionType(),
@@ -5879,6 +5917,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                   LLD->getChain(), Addr, 0, 0,
                                   LLD->getMemoryVT(),
                                   LLD->isVolatile(),
+                                  LLD->isNonTemporal(),
                                   LLD->getAlignment());
           }
 
@@ -5986,7 +6025,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
                             CstOffset);
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            PseudoSourceValue::getConstantPool(), 0, false,
-                           Alignment);
+                           false, Alignment);
 
       }
     }  
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 09fd657..35ef5b7 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -121,7 +121,7 @@ unsigned FastISel::getRegForValue(Value *V) {
     Reg = LocalValueMap[CE];
   } else if (isa<UndefValue>(V)) {
     Reg = createResultReg(TLI.getRegClassFor(VT));
-    BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg);
+    BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
   }
   
   // If target-independent code couldn't handle the value, give target-specific
@@ -332,6 +332,8 @@ bool FastISel::SelectCall(User *I) {
       return true;
 
     Value *Address = DI->getAddress();
+    if (!Address)
+      return true;
     AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI) break;
@@ -343,6 +345,9 @@ bool FastISel::SelectCall(User *I) {
       if (MDNode *Dbg = DI->getMetadata("dbg"))
         MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
     }
+    // Building the map above is target independent.  Generating DBG_VALUE
+    // inline is target dependent; do this now.
+    (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
   case Intrinsic::eh_exception: {
@@ -966,7 +971,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
   const TargetRegisterClass* RC = MRI.getRegClass(Op0);
   
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
-  const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG);
+  const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
   
   if (II.getNumDefs() >= 1)
     BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index dc7d82d..50f4c32 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -227,7 +227,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
         unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
         const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
         for (unsigned i = 0; i != NumRegisters; ++i)
-          BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
+          BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
         PHIReg += NumRegisters;
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9c50936..02fe85d 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -178,7 +178,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
                                        const TargetInstrDesc &II,
                                        bool IsClone, bool IsCloned,
                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
-  assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
+  assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
          "IMPLICIT_DEF should have been handled as a special case elsewhere!");
 
   for (unsigned i = 0; i < II.getNumDefs(); ++i) {
@@ -236,7 +236,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
 unsigned InstrEmitter::getVR(SDValue Op,
                              DenseMap<SDValue, unsigned> &VRBaseMap) {
   if (Op.isMachineOpcode() &&
-      Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
     unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
     // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
@@ -246,7 +246,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(MBB, Op.getDebugLoc(),
-            TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg);
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
     return VReg;
   }
 
@@ -396,12 +396,12 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     }
   }
   
-  if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {
+  if (Opc == TargetOpcode::EXTRACT_SUBREG) {
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
 
     // Create the extract_subreg machine instruction.
     MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetInstrInfo::EXTRACT_SUBREG));
+                               TII->get(TargetOpcode::EXTRACT_SUBREG));
 
     // Figure out the register class to create for the destreg.
     unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
@@ -424,8 +424,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
     MI->addOperand(MachineOperand::CreateImm(SubIdx));
     MBB->insert(InsertPos, MI);
-  } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||
-             Opc == TargetInstrInfo::SUBREG_TO_REG) {
+  } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+             Opc == TargetOpcode::SUBREG_TO_REG) {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
     SDValue N2 = Node->getOperand(2);
@@ -452,7 +452,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     
     // If creating a subreg_to_reg, then the first input operand
     // is an implicit value immediate, otherwise it's a register
-    if (Opc == TargetInstrInfo::SUBREG_TO_REG) {
+    if (Opc == TargetOpcode::SUBREG_TO_REG) {
       const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
       MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
     } else
@@ -507,20 +507,20 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
     unsigned Opc = Node->getMachineOpcode();
     
     // Handle subreg insert/extract specially
-    if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 
-        Opc == TargetInstrInfo::INSERT_SUBREG ||
-        Opc == TargetInstrInfo::SUBREG_TO_REG) {
+    if (Opc == TargetOpcode::EXTRACT_SUBREG || 
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG) {
       EmitSubregNode(Node, VRBaseMap);
       return;
     }
 
     // Handle COPY_TO_REGCLASS specially.
-    if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {
+    if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
       EmitCopyToRegClassNode(Node, VRBaseMap);
       return;
     }
 
-    if (Opc == TargetInstrInfo::IMPLICIT_DEF)
+    if (Opc == TargetOpcode::IMPLICIT_DEF)
       // We want a unique VR for each IMPLICIT_DEF use.
       return;
     
@@ -640,7 +640,7 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
       
     // Create the inline asm machine instruction.
     MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetInstrInfo::INLINEASM));
+                               TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
     const char *AsmStr =
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5e3f58a..e9321da 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -377,9 +377,10 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
     return DAG.getExtLoad(ISD::EXTLOAD, dl,
                           OrigVT, DAG.getEntryNode(),
                           CPIdx, PseudoSourceValue::getConstantPool(),
-                          0, VT, false, Alignment);
+                          0, VT, false, false, Alignment);
   return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+                     PseudoSourceValue::getConstantPool(), 0, false, false,
+                     Alignment);
 }
 
 /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
@@ -402,7 +403,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // FIXME: Does not handle truncating floating point stores!
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
       return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
-                          SVOffset, ST->isVolatile(), Alignment);
+                          SVOffset, ST->isVolatile(), ST->isNonTemporal(),
+                          Alignment);
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
@@ -418,7 +420,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Perform the original store, only redirected to the stack slot.
       SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, NULL, 0, StoredVT);
+                                        Val, StackPtr, NULL, 0, StoredVT,
+                                        false, false, 0);
       SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
       SmallVector<SDValue, 8> Stores;
       unsigned Offset = 0;
@@ -426,11 +429,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Do all but one copies using the full register width.
       for (unsigned i = 1; i < NumRegs; i++) {
         // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+                                   false, false, 0);
         // Store it to the final location.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                       ST->getSrcValue(), SVOffset + Offset,
-                                      ST->isVolatile(),
+                                      ST->isVolatile(), ST->isNonTemporal(),
                                       MinAlign(ST->getAlignment(), Offset)));
         // Increment the pointers.
         Offset += RegBytes;
@@ -446,11 +450,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-                                    NULL, 0, MemVT);
+                                    NULL, 0, MemVT, false, false, 0);
 
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
                                          ST->getSrcValue(), SVOffset + Offset,
                                          MemVT, ST->isVolatile(),
+                                         ST->isNonTemporal(),
                                          MinAlign(ST->getAlignment(), Offset)));
       // The order of the stores doesn't matter - say it with a TokenFactor.
       return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -474,13 +479,14 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Store1, Store2;
   Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
                              ST->getSrcValue(), SVOffset, NewStoredVT,
-                             ST->isVolatile(), Alignment);
+                             ST->isVolatile(), ST->isNonTemporal(), Alignment);
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getConstant(IncrementSize, TLI.getPointerTy()));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
                              ST->getSrcValue(), SVOffset + IncrementSize,
-                             NewStoredVT, ST->isVolatile(), Alignment);
+                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+                             Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
 }
@@ -502,7 +508,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
                                     SVOffset, LD->isVolatile(),
-                                    LD->getAlignment());
+                                    LD->isNonTemporal(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
@@ -530,10 +536,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
         // Load one integer register's worth from the original location.
         SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
                                    SVOffset + Offset, LD->isVolatile(),
+                                   LD->isNonTemporal(),
                                    MinAlign(LD->getAlignment(), Offset));
         // Follow the load with a store to the stack slot.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                      NULL, 0));
+                                      NULL, 0, false, false, 0));
         // Increment the pointers.
         Offset += RegBytes;
         Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
@@ -546,12 +553,13 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
+                                    LD->isNonTemporal(),
                                     MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
       // On big-endian machines this requires a truncating store to ensure
       // that the bits end up in the right place.
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                         NULL, 0, MemVT));
+                                         NULL, 0, MemVT, false, false, 0));
 
       // The order of the stores doesn't matter - say it with a TokenFactor.
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -559,7 +567,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
 
       // Finally, perform the original load only redirected to the stack slot.
       Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
-                            NULL, 0, LoadedVT);
+                            NULL, 0, LoadedVT, false, false, 0);
 
       // Callers expect a MERGE_VALUES node.
       SDValue Ops[] = { Load, TF };
@@ -588,20 +596,22 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   } else {
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   }
 
   // aggregate the two parts
@@ -643,7 +653,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            PseudoSourceValue::getFixedStack(SPFI), 0);
+                            PseudoSourceValue::getFixedStack(SPFI), 0,
+                            false, false, 0);
 
   // Truncate or zero extend offset to target pointer type.
   unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -654,10 +665,12 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
-                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+                         false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -702,6 +715,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   int SVOffset = ST->getSrcValueOffset();
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
   DebugLoc dl = ST->getDebugLoc();
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
     if (CFP->getValueType(0) == MVT::f32 &&
@@ -710,14 +724,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
       return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
     } else if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (getTypeAction(MVT::i64) == Legal) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
         return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                            SVOffset, isVolatile, Alignment);
+                            SVOffset, isVolatile, isNonTemporal, Alignment);
       } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
@@ -728,11 +742,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(4));
         Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
-                          isVolatile, MinAlign(Alignment, 4U));
+                          isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
 
         return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
       }
@@ -1108,7 +1122,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                            LD->getSrcValueOffset(),
-                           LD->isVolatile(), LD->getAlignment());
+                           LD->isVolatile(), LD->isNonTemporal(),
+                           LD->getAlignment());
         Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
         Tmp4 = LegalizeOp(Tmp1.getValue(1));
         break;
@@ -1125,6 +1140,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       int SVOffset = LD->getSrcValueOffset();
       unsigned Alignment = LD->getAlignment();
       bool isVolatile = LD->isVolatile();
+      bool isNonTemporal = LD->isNonTemporal();
 
       if (SrcWidth != SrcVT.getStoreSizeInBits() &&
           // Some targets pretend to have an i1 loading operation, and actually
@@ -1150,7 +1166,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
                                 Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
-                                NVT, isVolatile, Alignment);
+                                NVT, isVolatile, isNonTemporal, Alignment);
 
         Ch = Result.getValue(1); // The chain.
 
@@ -1187,7 +1203,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1195,7 +1211,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1215,7 +1231,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Load the top RoundWidth bits.
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1224,7 +1240,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1284,7 +1300,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
               (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
             SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                                        LD->getSrcValueOffset(),
-                                       LD->isVolatile(), LD->getAlignment());
+                                       LD->isVolatile(), LD->isNonTemporal(),
+                                       LD->getAlignment());
             Result = DAG.getNode(ISD::FP_EXTEND, dl,
                                  Node->getValueType(0), Load);
             Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
@@ -1297,7 +1314,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
                                   Tmp1, Tmp2, LD->getSrcValue(),
                                   LD->getSrcValueOffset(), SrcVT,
-                                  LD->isVolatile(), LD->getAlignment());
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  LD->getAlignment());
           SDValue ValRes;
           if (ExtType == ISD::SEXTLOAD)
             ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1325,6 +1343,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     int SVOffset = ST->getSrcValueOffset();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
+    bool isNonTemporal = ST->isNonTemporal();
 
     if (!ST->isTruncatingStore()) {
       if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -1361,7 +1380,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
                                 ST->getSrcValue(), SVOffset, isVolatile,
-                                Alignment);
+                                isNonTemporal, Alignment);
           break;
         }
         break;
@@ -1379,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, NVT, isVolatile, Alignment);
+                                   SVOffset, NVT, isVolatile, isNonTemporal,
+                                   Alignment);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1399,7 +1419,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Store the bottom RoundWidth bits.
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset, RoundVT,
-                                 isVolatile, Alignment);
+                                 isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1409,6 +1429,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                            DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         } else {
           // Big endian - avoid unaligned stores.
@@ -1417,7 +1438,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT, isVolatile, Alignment);
+                                 SVOffset, RoundVT, isVolatile, isNonTemporal,
+                                 Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1425,6 +1447,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         }
 
@@ -1457,7 +1480,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, Alignment);
+                                SVOffset, isVolatile, isNonTemporal,
+                                Alignment);
           break;
         }
       }
@@ -1484,7 +1508,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   DebugLoc dl = Op.getDebugLoc();
   // Store the value to a temporary stack slot, then LOAD the returned part.
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                            false, false, 0);
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1500,10 +1525,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+                       false, false, 0);
   else
     return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
-                          NULL, 0, Vec.getValueType().getVectorElementType());
+                          NULL, 0, Vec.getValueType().getVectorElementType(),
+                          false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1533,12 +1560,14 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
     // If EltVT smaller than OpVT, only store the bits necessary.
-    if (EltVT.bitsLT(OpVT))
+    if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
-                          Node->getOperand(i), Idx, SV, Offset, EltVT));
-    else
+                                         Node->getOperand(i), Idx, SV, Offset,
+                                         EltVT, false, false, 0));
+    } else
       Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
-                                    Node->getOperand(i), Idx, SV, Offset));
+                                    Node->getOperand(i), Idx, SV, Offset,
+                                    false, false, 0));
   }
 
   SDValue StoreChain;
@@ -1549,7 +1578,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1572,12 +1601,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
     SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
     SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
     SDValue Ch =
-        DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+                   false, false, 0);
     if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
       LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
                             LoadPtr, DAG.getIntPtrConstant(4));
     SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
-                              Ch, LoadPtr, NULL, 0, MVT::i32);
+                             Ch, LoadPtr, NULL, 0, MVT::i32,
+                             false, false, 0);
   }
   SignBit =
       DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
@@ -1701,20 +1732,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   if (SrcSize > SlotSize)
     Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                              SV, 0, SlotVT, false, SrcAlign);
+                              SV, 0, SlotVT, false, false, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                         SV, 0, false, SrcAlign);
+                         SV, 0, false, false, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
-    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
+                       DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
-                        false, DestAlign);
+                        false, false, DestAlign);
 }
 
 SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1729,9 +1761,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
   SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
                                  StackPtr,
                                  PseudoSourceValue::getFixedStack(SPFI), 0,
-                                 Node->getValueType(0).getVectorElementType());
+                                 Node->getValueType(0).getVectorElementType(),
+                                 false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -1805,7 +1839,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        PseudoSourceValue::getConstantPool(), 0,
-                       false, Alignment);
+                       false, false, Alignment);
   }
 
   if (!MoreThanTwoValues) {
@@ -1943,13 +1977,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
     // store the lo of the constructed double - based on integer input
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
-                                  Op0Mapped, Lo, NULL, 0);
+                                  Op0Mapped, Lo, NULL, 0,
+                                  false, false, 0);
     // initial hi portion of constructed double
     SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
     // store the hi of the constructed double - biased exponent
-    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
+                                false, false, 0);
     // load the constructed double
-    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
+                               false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -2004,13 +2041,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
                              PseudoSourceValue::getConstantPool(), 0,
-                             false, Alignment);
+                             false, false, Alignment);
   else {
     FudgeInReg =
       LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
-                                MVT::f32, false, Alignment));
+                                MVT::f32, false, false, Alignment));
   }
 
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2350,16 +2387,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
-    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+                                 false, false, 0);
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
                                        getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+                        false, false, 0);
     // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+                                  false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
   }
@@ -2369,8 +2409,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), VS, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+                       Node->getOperand(2), VS, 0, false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
+                        false, false, 0);
     Results.push_back(Tmp1);
     break;
   }
@@ -2767,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                             DAG.getIntPtrConstant(1));
     } else {
       // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases cases.
+      // type in some cases.
       // Also, we can fall back to a division in some cases, but that's a big
       // performance hit in the general case.
       llvm_unreachable("Don't know how to expand this operation yet!");
@@ -2816,15 +2857,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Index = Node->getOperand(2);
 
     EVT PTy = TLI.getPointerTy();
-    MachineFunction &MF = DAG.getMachineFunction();
-    unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
-    Index= DAG.getNode(ISD::MUL, dl, PTy,
+
+    const TargetData &TD = *TLI.getTargetData();
+    unsigned EntrySize =
+      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+    
+    Index = DAG.getNode(ISD::MUL, dl, PTy,
                         Index, DAG.getConstant(EntrySize, PTy));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
     SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
-                                PseudoSourceValue::getJumpTable(), 0, MemVT);
+                                PseudoSourceValue::getJumpTable(), 0, MemVT,
+                                false, false, 0);
     Addr = LD;
     if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4f0fce7..35a7c7c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -444,7 +444,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
     NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
                        NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
                        L->getSrcValue(), L->getSrcValueOffset(), NVT,
-                       L->isVolatile(), L->getAlignment());
+                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
     ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -456,8 +456,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                      L->getMemoryVT(), L->getChain(),
                      L->getBasePtr(), L->getOffset(),
                      L->getSrcValue(), L->getSrcValueOffset(),
-                     L->getMemoryVT(),
-                     L->isVolatile(), L->getAlignment());
+                     L->getMemoryVT(), L->isVolatile(),
+                     L->isNonTemporal(), L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -755,7 +755,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 
   return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
                       ST->getSrcValue(), ST->getSrcValueOffset(),
-                      ST->isVolatile(), ST->getAlignment());
+                      ST->isVolatile(), ST->isNonTemporal(),
+                      ST->getAlignment());
 }
 
 
@@ -1073,8 +1074,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 
   Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
                       LD->getSrcValue(), LD->getSrcValueOffset(),
-                      LD->getMemoryVT(),
-                      LD->isVolatile(), LD->getAlignment());
+                      LD->getMemoryVT(), LD->isVolatile(),
+                      LD->isNonTemporal(), LD->getAlignment());
 
   // Remember the chain.
   Chain = Hi.getValue(1);
@@ -1382,6 +1383,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
 
   return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
                            ST->getSrcValue(), ST->getSrcValueOffset(),
-                           ST->getMemoryVT(),
-                           ST->isVolatile(), ST->getAlignment());
+                           ST->getMemoryVT(), ST->isVolatile(),
+                           ST->isNonTemporal(), ST->getAlignment());
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9932cf4..e4d123f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -359,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT(), N->isVolatile(),
-                               N->getAlignment());
+                               N->isNonTemporal(), N->getAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -873,6 +873,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
@@ -880,7 +881,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   // Truncate the value and store the result.
   return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
                            SVOffset, N->getMemoryVT(),
-                           isVolatile, Alignment);
+                           isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1500,6 +1501,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1508,7 +1510,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     EVT MemVT = N->getMemoryVT();
 
     Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        MemVT, isVolatile, Alignment);
+                        MemVT, isVolatile, isNonTemporal, Alignment);
 
     // Remember the chain.
     Ch = Lo.getValue(1);
@@ -1530,7 +1532,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
-                     isVolatile, Alignment);
+                     isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1542,7 +1544,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize, NEVT,
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -1560,7 +1563,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
                         EVT::getIntegerVT(*DAG.getContext(),
                                           MemVT.getSizeInBits() - ExcessBits),
-                        isVolatile, Alignment);
+                        isVolatile, isNonTemporal, Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1569,7 +1572,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize,
                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -2212,6 +2216,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
   SDValue Lo, Hi;
 
@@ -2220,13 +2225,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   if (N->getMemoryVT().bitsLE(NVT)) {
     GetExpandedInteger(N->getValue(), Lo, Hi);
     return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                             N->getMemoryVT(), isVolatile, Alignment);
+                             N->getMemoryVT(), isVolatile, isNonTemporal,
+                             Alignment);
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVolatile, Alignment);
+                      isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2238,7 +2244,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize, NEVT,
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   } else {
     // Big-endian - high bits are at low addresses.  Favor aligned stores at
@@ -2264,7 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
     // Store both the high bits and maybe some of the low bits.
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset, HiVT, isVolatile, Alignment);
+                           SVOffset, HiVT, isVolatile, isNonTemporal,
+                           Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2273,7 +2281,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize,
                            EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   }
 }
@@ -2341,7 +2350,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     // FIXME: Avoid the extend by constructing the right constant pool?
     SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
                                    FudgePtr, NULL, 0, MVT::f32,
-                                   false, Alignment);
+                                   false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 37f36a3..0d929f1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -871,9 +871,10 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
   // the source and destination types.
   SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
+                               false, false, 0);
   // Result is a load from the stack slot.
-  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b5dbd41..b0af357 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -609,6 +609,7 @@ private:
   SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
   SDValue WidenVecRes_SELECT(SDNode* N);
   SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_SETCC(SDNode* N);
   SDValue WidenVecRes_UNDEF(SDNode *N);
   SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
   SDValue WidenVecRes_VSETCC(SDNode* N);
@@ -633,43 +634,33 @@ private:
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
 
-  /// Helper genWidenVectorLoads - Helper function to generate a set of
+  /// Helper GenWidenVectorLoads - Helper function to generate a set of
   /// loads to load a vector with a resulting wider type. It takes
-  ///   ExtType: Extension type
-  ///   LdChain: list of chains for the load we have generated.
-  ///   Chain:   incoming chain for the ld vector.
-  ///   BasePtr: base pointer to load from.
-  ///   SV:         memory disambiguation source value.
-  ///   SVOffset:   memory disambiugation offset.
-  ///   Alignment:  alignment of the memory.
-  ///   isVolatile: volatile load.
-  ///   LdWidth:    width of memory that we want to load.
-  ///   ResType:    the wider result result type for the resulting vector.
-  ///   dl:         DebugLoc to be applied to new nodes
-  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
-                              SDValue BasePtr, const Value *SV,
-                              int SVOffset, unsigned Alignment,
-                              bool isVolatile, unsigned LdWidth,
-                              EVT ResType, DebugLoc dl);
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                              LoadSDNode *LD);
+
+  /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+  /// loads to load a ector with a resulting wider type.  It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  ///   ExtType: extension element type
+  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                 LoadSDNode *LD, ISD::LoadExtType ExtType);
 
   /// Helper genWidenVectorStores - Helper function to generate a set of
   /// stores to store a widen vector into non widen memory
-  /// It takes
   ///   StChain: list of chains for the stores we have generated
-  ///   Chain:   incoming chain for the ld vector
-  ///   BasePtr: base pointer to load from
-  ///   SV:      memory disambiguation source value
-  ///   SVOffset:   memory disambiugation offset
-  ///   Alignment:  alignment of the memory
-  ///   isVolatile: volatile lod
-  ///   ValOp:   value to store
-  ///   StWidth: width of memory that we want to store
-  ///   dl:         DebugLoc to be applied to new nodes
-  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
-                            SDValue BasePtr, const Value *SV,
-                            int SVOffset, unsigned Alignment,
-                            bool isVolatile, SDValue ValOp,
-                            unsigned StWidth, DebugLoc dl);
+  ///   ST:      store of a widen value
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+  /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+  /// stores to store a truncate widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                 StoreSDNode *ST);
 
   /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
   /// input vector must have the same element type as NVT.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a1b6ced..5e83b4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -122,10 +122,11 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
 
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +135,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())
@@ -205,11 +206,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   int SVOffset = LD->getSrcValueOffset();
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
-                   isVolatile, Alignment);
+                   isVolatile, isNonTemporal, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -217,7 +219,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
                    SVOffset+IncrementSize,
-                   isVolatile, MinAlign(Alignment, IncrementSize));
+                   isVolatile, isNonTemporal,
+                   MinAlign(Alignment, IncrementSize));
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -383,6 +386,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   int SVOffset = St->getSrcValueOffset();
   unsigned Alignment = St->getAlignment();
   bool isVolatile = St->isVolatile();
+  bool isNonTemporal = St->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -394,14 +398,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
     std::swap(Lo, Hi);
 
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
-                    isVolatile, Alignment);
+                    isVolatile, isNonTemporal, Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
   Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
                     SVOffset + IncrementSize,
-                    isVolatile, MinAlign(Alignment, IncrementSize));
+                    isVolatile, isNonTemporal,
+                    MinAlign(Alignment, IncrementSize));
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 808bac7..8363c3a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -172,7 +172,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT().getVectorElementType(),
-                               N->isVolatile(), N->getOriginalAlignment());
+                               N->isVolatile(), N->isNonTemporal(),
+                               N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -366,11 +367,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
                              N->getBasePtr(),
                              N->getSrcValue(), N->getSrcValueOffset(),
                              N->getMemoryVT().getVectorElementType(),
-                             N->isVolatile(), N->getAlignment());
+                             N->isVolatile(), N->isNonTemporal(),
+                             N->getAlignment());
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
                       N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
-                      N->isVolatile(), N->getOriginalAlignment());
+                      N->isVolatile(), N->isNonTemporal(),
+                      N->getOriginalAlignment());
 }
 
 
@@ -696,17 +699,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                               false, false, 0);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   unsigned Alignment =
     TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
-  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+                            false, false, 0);
 
   // Load the Lo part from the stack slot.
-  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+                   false, false, 0);
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -715,7 +721,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -743,19 +749,20 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   EVT MemoryVT = LD->getMemoryVT();
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
-                   SV, SVOffset, LoMemVT, isVolatile, Alignment);
+                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   SVOffset += IncrementSize;
   Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
-                   SV, SVOffset, HiMemVT, isVolatile, Alignment);
+                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1086,12 +1093,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
-                        SV, 0, EltVT);
+                        SV, 0, EltVT, false, false, 0);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1106,6 +1114,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   EVT MemoryVT = N->getMemoryVT();
   unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
+  bool isNT = N->isNonTemporal();
   SDValue Lo, Hi;
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
@@ -1116,10 +1125,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (isTruncating)
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                           LoMemVT, isVol, Alignment);
+                           LoMemVT, isVol, isNT, Alignment);
   else
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1128,10 +1137,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                           HiMemVT, isVol, Alignment);
+                           HiMemVT, isVol, isNT, Alignment);
   else
     Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
@@ -1172,6 +1181,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:
     Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
@@ -1241,10 +1251,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+     NumElts = NumElts / 2;
+     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  } else if (NumElts == 1) {
+    // No legal vector version so unroll the vector operation and then widen.
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+  } else {
+    // Since the operation can trap, apply operation on the original vector.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+    SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+    unsigned ConcatEnd = 0;  // Current ConcatOps index.
+    unsigned Idx = 0;        // Current Idx into input vectors.
+    while (CurNumElts != 0) {
+      while (CurNumElts >= NumElts) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                   DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                   DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+        Idx += NumElts;
+        CurNumElts -= NumElts;
+      }
+      EVT PrevVecVT = VT;
+      do {
+        NumElts = NumElts / 2;
+        VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+      } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+      if (NumElts == 1) {
+        // Since we are using concat vector, build a vector from the scalar ops.
+        SDValue VecOp = DAG.getUNDEF(PrevVecVT);
+        for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+          SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp1, DAG.getIntPtrConstant(Idx));
+          SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp2, DAG.getIntPtrConstant(Idx));
+          VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
+                              DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
+                              DAG.getIntPtrConstant(i));
+        }
+        CurNumElts = 0;
+        ConcatOps[ConcatEnd++] = VecOp;
+      }
+    }
+
+    // Check to see if we have a single operation with the widen type.
+    if (ConcatEnd == 1) {
+      VT = ConcatOps[0].getValueType();
+      if (VT == WidenVT)
+        return ConcatOps[0];
+    }
+
+    // Rebuild vector to one with the widen type
+    Idx = ConcatEnd - 1;
+    while (Idx != 0) {
+      VT = ConcatOps[Idx--].getValueType();
+      while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
+        --Idx;
+      if (Idx != 0) {
+        VT = ConcatOps[Idx].getValueType();
+        ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                                     &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+        ConcatEnd = Idx + 2;
+      }
+    }
+    
+    unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+    if (NumOps != ConcatEnd ) {
+      SDValue UndefVal = DAG.getUNDEF(VT);
+      for (unsigned j = ConcatEnd; j < NumOps; ++j)
+        ConcatOps[j] = UndefVal;
+    }
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+  }
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -1655,68 +1751,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
-  EVT LdVT    = LD->getMemoryVT();
-  DebugLoc dl = N->getDebugLoc();
-  assert(LdVT.isVector() && WidenVT.isVector());
-
-  // Load information
-  SDValue   Chain = LD->getChain();
-  SDValue   BasePtr = LD->getBasePtr();
-  int       SVOffset = LD->getSrcValueOffset();
-  unsigned  Align    = LD->getAlignment();
-  bool      isVolatile = LD->isVolatile();
-  const Value *SV = LD->getSrcValue();
   ISD::LoadExtType ExtType = LD->getExtensionType();
 
   SDValue Result;
   SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
-  if (ExtType != ISD::NON_EXTLOAD) {
-    // For extension loads, we can not play the tricks of chopping legal
-    // vector types and bit cast it to the right type.  Instead, we unroll
-    // the load and build a vector.
-    EVT EltVT = WidenVT.getVectorElementType();
-    EVT LdEltVT = LdVT.getVectorElementType();
-    unsigned NumElts = LdVT.getVectorNumElements();
-
-    // Load each element and widen
-    unsigned WidenNumElts = WidenVT.getVectorNumElements();
-    SmallVector<SDValue, 16> Ops(WidenNumElts);
-    unsigned Increment = LdEltVT.getSizeInBits() / 8;
-    Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
-                            LdEltVT, isVolatile, Align);
-    LdChain.push_back(Ops[0].getValue(1));
-    unsigned i = 0, Offset = Increment;
-    for (i=1; i < NumElts; ++i, Offset += Increment) {
-      SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                       BasePtr, DAG.getIntPtrConstant(Offset));
-      Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
-                              SVOffset + Offset, LdEltVT, isVolatile, Align);
-      LdChain.push_back(Ops[i].getValue(1));
-    }
-
-    // Fill the rest with undefs
-    SDValue UndefVal = DAG.getUNDEF(EltVT);
-    for (; i != WidenNumElts; ++i)
-      Ops[i] = UndefVal;
-
-    Result =  DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
-  } else {
-    assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
-    unsigned int LdWidth = LdVT.getSizeInBits();
-    Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
-                                 Align, isVolatile, LdWidth, WidenVT, dl);
-  }
-
- // If we generate a single load, we can use that for the chain.  Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
-   NewChain = LdChain[0];
- else
-   NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],
-                          LdChain.size());
+  if (ExtType != ISD::NON_EXTLOAD)
+    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+  else
+    Result = GenWidenVectorLoads(LdChain, LD);
+
+  // If we generate a single load, we can use that for the chain.  Otherwise,
+  // build a factor node to remember the multiple loads are independent and
+  // chain to that.
+  SDValue NewChain;
+  if (LdChain.size() == 1)
+    NewChain = LdChain[0];
+  else
+    NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+                           &LdChain[0], LdChain.size());
 
   // Modified the chain - switch anything that used the old chain to use
   // the new one.
@@ -1762,6 +1814,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
                      N->getOperand(1), InOp1, InOp2, N->getOperand(4));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+                     InOp1, InOp2, N->getOperand(2));
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  return DAG.getUNDEF(WidenVT);
@@ -1954,57 +2014,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
   // We have to widen the value but we want only to store the original
   // vector type.
   StoreSDNode *ST = cast<StoreSDNode>(N);
-  SDValue  Chain = ST->getChain();
-  SDValue  BasePtr = ST->getBasePtr();
-  const    Value *SV = ST->getSrcValue();
-  int      SVOffset = ST->getSrcValueOffset();
-  unsigned Align = ST->getAlignment();
-  bool     isVolatile = ST->isVolatile();
-  SDValue  ValOp = GetWidenedVector(ST->getValue());
-  DebugLoc dl = N->getDebugLoc();
-
-  EVT StVT = ST->getMemoryVT();
-  EVT ValVT = ValOp.getValueType();
-  // It must be true that we the widen vector type is bigger than where
-  // we need to store.
-  assert(StVT.isVector() && ValOp.getValueType().isVector());
-  assert(StVT.bitsLT(ValOp.getValueType()));
 
   SmallVector<SDValue, 16> StChain;
-  if (ST->isTruncatingStore()) {
-    // For truncating stores, we can not play the tricks of chopping legal
-    // vector types and bit cast it to the right type.  Instead, we unroll
-    // the store.
-    EVT StEltVT  = StVT.getVectorElementType();
-    EVT ValEltVT = ValVT.getVectorElementType();
-    unsigned Increment = ValEltVT.getSizeInBits() / 8;
-    unsigned NumElts = StVT.getVectorNumElements();
-    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                              DAG.getIntPtrConstant(0));
-    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
-                                        SVOffset, StEltVT,
-                                        isVolatile, Align));
-    unsigned Offset = Increment;
-    for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
-      SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                       BasePtr, DAG.getIntPtrConstant(Offset));
-      SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                              DAG.getIntPtrConstant(0));
-      StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
-                                          SVOffset + Offset, StEltVT,
-                                          isVolatile, MinAlign(Align, Offset)));
-    }
-  }
-  else {
-    assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
-    // Store value
-    GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
-                         Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);
-  }
+  if (ST->isTruncatingStore())
+    GenWidenVectorTruncStores(StChain, ST);
+  else
+    GenWidenVectorStores(StChain, ST);
+
   if (StChain.size() == 1)
     return StChain[0];
   else
-    return DAG.getNode(ISD::TokenFactor, dl,
+    return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
                        MVT::Other,&StChain[0],StChain.size());
 }
 
@@ -2012,179 +2032,390 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 // Vector Widening Utilities
 //===----------------------------------------------------------------------===//
 
+// Utility function to find the type to chop up a widen vector for load/store
+//  TLI:       Target lowering used to determine legal types.
+//  Width:     Width left need to load/store.
+//  WidenVT:   The widen vector type to load to/store from
+//  Align:     If 0, don't allow use of a wider type
+//  WidenEx:   If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+                       unsigned Width, EVT WidenVT,
+                       unsigned Align = 0, unsigned WidenEx = 0) {
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+  unsigned AlignInBits = Align*8;
+
+  // If we have one element to load/store, return it.
+  EVT RetVT = WidenEltVT;
+  if (Width == WidenEltWidth)
+    return RetVT;
+
+  // See if there is larger legal integer than the element type to load/store 
+  unsigned VT;
+  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+    EVT MemVT((MVT::SimpleValueType) VT);
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (MemVT.getSizeInBits() <= WidenEltWidth)
+      break;
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      RetVT = MemVT;
+      break;
+    }
+  }
 
-// Utility function to find a vector type and its associated element
-// type from a preferred width and whose vector type must be the same size
-// as the VecVT.
-//  TLI:   Target lowering used to determine legal types.
-//  Width: Preferred width to store.
-//  VecVT: Vector value type whose size we must match.
-// Returns NewVecVT and NewEltVT - the vector type and its associated
-// element type.
-static void FindAssocWidenVecType(SelectionDAG& DAG,
-                                  const TargetLowering &TLI, unsigned Width,
-                                  EVT VecVT,
-                                  EVT& NewEltVT, EVT& NewVecVT) {
-  unsigned EltWidth = Width + 1;
-  if (TLI.isTypeLegal(VecVT)) {
-    // We start with the preferred with, making it a power of 2 and find a
-    // legal vector type of that width.  If not, we reduce it by another of 2.
-    // For incoming type is legal, this process will end as a vector of the
-    // smallest loadable type should always be legal.
-    do {
-      assert(EltWidth > 0);
-      EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
-      unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
-    } while (!TLI.isTypeLegal(NewVecVT) ||
-             VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
-  } else {
-    // The incoming vector type is illegal and is the result of widening
-    // a vector to a power of 2. In this case, we will use the preferred
-    // with as long as it is a multiple of the incoming vector length.
-    // The legalization process will eventually make this into a legal type
-    // and remove the illegal bit converts (which would turn to stack converts
-    // if they are allow to exist).
-     do {
-      assert(EltWidth > 0);
-      EltWidth = 1 << Log2_32(EltWidth - 1);
-      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
-      unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
-      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
-    } while (!TLI.isTypeLegal(NewEltVT) ||
-             VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+  // See if there is a larger vector type to load/store that has the same vector
+  // element type and is evenly divisible with the WidenVT.
+  for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+    EVT MemVT = (MVT::SimpleValueType) VT;
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+        (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+        return MemVT;
+    }
   }
+
+  return RetVT;
+}
+
+// Builds a vector type from scalar loads
+//  VecTy: Resulting Vector type
+//  LDOps: Load operators to build a vector type
+//  [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+                                     SmallVector<SDValue, 16>& LdOps,
+                                     unsigned Start, unsigned End) {
+  DebugLoc dl = LdOps[Start].getDebugLoc();
+  EVT LdTy = LdOps[Start].getValueType();
+  unsigned Width = VecTy.getSizeInBits();
+  unsigned NumElts = Width / LdTy.getSizeInBits();
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+  unsigned Idx = 1;
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+  for (unsigned i = Start + 1; i != End; ++i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      NumElts = Width / NewLdTy.getSizeInBits();
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+      // Readjust position and vector position based on new load type
+      Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+      LdTy = NewLdTy;
+    }
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+                        DAG.getIntPtrConstant(Idx++));
+  }
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
 }
 
 SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
-                                              SDValue      Chain,
-                                              SDValue      BasePtr,
-                                              const Value *SV,
-                                              int          SVOffset,
-                                              unsigned     Alignment,
-                                              bool         isVolatile,
-                                              unsigned     LdWidth,
-                                              EVT          ResType,
-                                              DebugLoc     dl) {
+                                              LoadSDNode * LD) {
   // The strategy assumes that we can efficiently load powers of two widths.
-  // The routines chops the vector into the largest power of 2 load and
-  // can be inserted into a legal vector and then cast the result into the
-  // vector type we want.  This avoids unnecessary stack converts.
+  // The routines chops the vector into the largest vector loads with the same
+  // element type or scalar loads and then recombines it to the widen vector
+  // type.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
 
-  // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
-  //       the load is nonvolatile, we an use a wider load for the value.
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+  const Value *SV = LD->getSrcValue();
+
+  int LdWidth = LdVT.getSizeInBits();
+  int WidthDiff = WidenWidth - LdWidth;          // Difference
+  unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
 
   // Find the vector type that can load from.
-  EVT NewEltVT, NewVecVT;
-  unsigned NewEltVTWidth;
-  FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
-  NewEltVTWidth = NewEltVT.getSizeInBits();
-
-  SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
-                             isVolatile, Alignment);
-  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+  int NewVTWidth = NewVT.getSizeInBits();
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+                             isVolatile, isNonTemporal, Align);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction
-  if (LdWidth == NewEltVTWidth) {
-    return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+  if (LdWidth <= NewVTWidth) {
+    if (NewVT.isVector()) {
+      if (NewVT != WidenVT) {
+        assert(WidenWidth % NewVTWidth == 0);
+        unsigned NumConcat = WidenWidth / NewVTWidth;
+        SmallVector<SDValue, 16> ConcatOps(NumConcat);
+        SDValue UndefVal = DAG.getUNDEF(NewVT);
+        ConcatOps[0] = LdOp;
+        for (unsigned i = 1; i != NumConcat; ++i)
+          ConcatOps[i] = UndefVal;
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+                           NumConcat);
+      } else
+        return LdOp;
+    } else {
+      unsigned NumElts = WidenWidth / LdWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+    }
   }
 
-  unsigned Idx = 1;
-  LdWidth -= NewEltVTWidth;
+  // Load vector by using multiple loads from largest vector to scalar
+  SmallVector<SDValue, 16> LdOps;
+  LdOps.push_back(LdOp);
+
+  LdWidth -= NewVTWidth;
   unsigned Offset = 0;
 
   while (LdWidth > 0) {
-    unsigned Increment = NewEltVTWidth / 8;
+    unsigned Increment = NewVTWidth / 8;
     Offset += Increment;
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                           DAG.getIntPtrConstant(Increment));
 
-    if (LdWidth < NewEltVTWidth) {
-      // Our current type we are using is too large, use a smaller size by
-      // using a smaller power of 2
-      unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
-      NewEltVTWidth = NewEltVT.getSizeInBits();
-      // Readjust position and vector position based on new load type
-      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
-      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+    if (LdWidth < NewVTWidth) {
+      // Our current type we are using is too large, find a better size
+      NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+      NewVTWidth = NewVT.getSizeInBits();
     }
 
-    SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,
-                                 SVOffset+Offset, isVolatile,
-                                 MinAlign(Alignment, Offset));
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
+                               SVOffset+Offset, isVolatile,
+                               isNonTemporal, MinAlign(Align, Increment));
     LdChain.push_back(LdOp.getValue(1));
-    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,
-                        DAG.getIntPtrConstant(Idx++));
+    LdOps.push_back(LdOp);
 
-    LdWidth -= NewEltVTWidth;
+    LdWidth -= NewVTWidth;
   }
 
-  return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
-}
+  // Build the vector from the loads operations
+  unsigned End = LdOps.size();
+  if (LdOps[0].getValueType().isVector()) {
+    // If the load contains vectors, build the vector using concat vector.
+    // All of the vectors used to loads are power of 2 and the scalars load
+    // can be combined to make a power of 2 vector.
+    SmallVector<SDValue, 16> ConcatOps(End);
+    int i = End - 1;
+    int Idx = End;
+    EVT LdTy = LdOps[i].getValueType();
+    // First combine the scalar loads to a vector
+    if (!LdTy.isVector())  {
+      for (--i; i >= 0; --i) {
+        LdTy = LdOps[i].getValueType();
+        if (LdTy.isVector())
+          break;
+      }
+      ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+    }
+    ConcatOps[--Idx] = LdOps[i];
+    for (--i; i >= 0; --i) {
+      EVT NewLdTy = LdOps[i].getValueType();
+      if (NewLdTy != LdTy) {
+        // Create a larger vector
+        ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+                                       &ConcatOps[Idx], End - Idx);
+        Idx = End - 1;
+        LdTy = NewLdTy;
+      }
+      ConcatOps[--Idx] = LdOps[i];
+    }
 
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
-                                            SDValue   Chain,
-                                            SDValue   BasePtr,
-                                            const Value *SV,
-                                            int         SVOffset,
-                                            unsigned    Alignment,
-                                            bool        isVolatile,
-                                            SDValue     ValOp,
-                                            unsigned    StWidth,
-                                            DebugLoc    dl) {
-  // Breaks the stores into a series of power of 2 width stores.  For any
-  // width, we convert the vector to the vector of element size that we
-  // want to store.  This avoids requiring a stack convert.
-
-  // Find a width of the element type we can store with
-  EVT WidenVT = ValOp.getValueType();
-  EVT NewEltVT, NewVecVT;
-
-  FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
-  unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
-
-  SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
-  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
-                            DAG.getIntPtrConstant(0));
-  SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,
-                               isVolatile, Alignment);
-  StChain.push_back(StOp);
+    if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
+      // We need to fill the rest with undefs to build the vector
+      unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+      SmallVector<SDValue, 16> WidenOps(NumOps);
+      SDValue UndefVal = DAG.getUNDEF(LdTy);
+      unsigned i = 0;
+      for (; i != End-Idx; ++i)
+        WidenOps[i] = ConcatOps[Idx+i];
+      for (; i != NumOps; ++i)
+        WidenOps[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+    } else
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                         &ConcatOps[Idx], End - Idx);
+  } else // All the loads are scalar loads.
+    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                         LoadSDNode * LD,
+                                         ISD::LoadExtType ExtType) {
+  // For extension loads, it may not be more efficient to chop up the vector
+  // and then extended it.  Instead, we unroll the load and build a new vector.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
 
-  // Check if we are done
-  if (StWidth == NewEltVTWidth) {
-    return;
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+  const Value *SV = LD->getSrcValue();
+
+  EVT EltVT = WidenVT.getVectorElementType();
+  EVT LdEltVT = LdVT.getVectorElementType();
+  unsigned NumElts = LdVT.getVectorNumElements();
+
+  // Load each element and widen
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Increment = LdEltVT.getSizeInBits() / 8;
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+                          LdEltVT, isVolatile, isNonTemporal, Align);
+  LdChain.push_back(Ops[0].getValue(1));
+  unsigned i = 0, Offset = Increment;
+  for (i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+                            SVOffset + Offset, LdEltVT, isVolatile,
+                            isNonTemporal, Align);
+    LdChain.push_back(Ops[i].getValue(1));
   }
 
-  unsigned Idx = 1;
-  StWidth -= NewEltVTWidth;
-  unsigned Offset = 0;
+  // Fill the rest with undefs
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i != WidenNumElts; ++i)
+    Ops[i] = UndefVal;
 
-  while (StWidth > 0) {
-    unsigned Increment = NewEltVTWidth / 8;
-    Offset += Increment;
-    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                          DAG.getIntPtrConstant(Increment));
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
 
-    if (StWidth < NewEltVTWidth) {
-      // Our current type we are using is too large, use a smaller size by
-      // using a smaller power of 2
-      unsigned oNewEltVTWidth = NewEltVTWidth;
-      FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
-      NewEltVTWidth = NewEltVT.getSizeInBits();
-      // Readjust position and vector position based on new load type
-      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
-      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
-    }
 
-    EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // The strategy assumes that we can efficiently store powers of two widths.
+  // The routines chops the vector into the largest vector stores with the same
+  // element type or scalar stores.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  const    Value *SV = ST->getSrcValue();
+  int      SVOffset = ST->getSrcValueOffset();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  unsigned StWidth = StVT.getSizeInBits();
+  EVT ValVT = ValOp.getValueType();
+  unsigned ValWidth = ValVT.getSizeInBits();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned ValEltWidth = ValEltVT.getSizeInBits();
+  assert(StVT.getVectorElementType() == ValEltVT);
+
+  int Idx = 0;          // current index to store
+  unsigned Offset = 0;  // offset from base to store
+  while (StWidth != 0) {
+    // Find the largest vector type we can store with
+    EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+    unsigned NewVTWidth = NewVT.getSizeInBits();
+    unsigned Increment = NewVTWidth / 8;
+    if (NewVT.isVector()) {
+      unsigned NumVTElts = NewVT.getVectorNumElements();
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+                                   DAG.getIntPtrConstant(Idx));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+                                       SVOffset + Offset, isVolatile,
+                                       isNonTemporal,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        Idx += NumVTElts;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0 && StWidth >= NewVTWidth);
+    } else {
+      // Cast the vector to the scalar type we can store
+      unsigned NumElts = ValWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+      // Readjust index position based on new vector type
+      Idx = Idx * ValEltWidth / NewVTWidth;
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
                       DAG.getIntPtrConstant(Idx++));
-    StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                   SVOffset + Offset, isVolatile,
-                                   MinAlign(Alignment, Offset)));
-    StWidth -= NewEltVTWidth;
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+                                       SVOffset + Offset, isVolatile,
+                                       isNonTemporal, MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0  && StWidth >= NewVTWidth);
+      // Restore index back to be relative to the original widen element type
+      Idx = Idx * NewVTWidth / ValEltWidth;
+    }
+  }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // For extension loads, it may not be more efficient to truncate the vector
+  // and then store it.  Instead, we extract each element and then store it.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  const    Value *SV = ST->getSrcValue();
+  int      SVOffset = ST->getSrcValueOffset();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+  
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
+
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && ValOp.getValueType().isVector());
+  assert(StVT.bitsLT(ValOp.getValueType()));
+
+  // For truncating stores, we can not play the tricks of chopping legal
+  // vector types and bit cast it to the right type.  Instead, we unroll
+  // the store.
+  EVT StEltVT  = StVT.getVectorElementType();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned Increment = ValEltVT.getSizeInBits() / 8;
+  unsigned NumElts = StVT.getVectorNumElements();
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+                                      SVOffset, StEltVT,
+                                      isVolatile, isNonTemporal, Align));
+  unsigned Offset = Increment;
+  for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+                                        SVOffset + Offset, StEltVT,
+                                        isVolatile, isNonTemporal,
+                                        MinAlign(Align, Offset)));
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
index 4706e68..ea716fd 100644
--- a/lib/CodeGen/SelectionDAG/Makefile
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -9,6 +9,5 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMSelectionDAG
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index dea5993..3f1766d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -345,6 +345,15 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
   ++NumBacktracks;
 }
 
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getFlaggedNode()) {
+    if (SUNode->isOperandOf(N))
+      return true;
+  }
+  return false;
+}
+
 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
 /// successors to the newly created node.
 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
@@ -427,8 +436,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
          I != E; ++I) {
       if (I->isCtrl())
         ChainPreds.push_back(*I);
-      else if (I->getSUnit()->getNode() &&
-               I->getSUnit()->getNode()->isOperandOf(LoadNode))
+      else if (isOperandOf(I->getSUnit(), LoadNode))
         LoadPreds.push_back(*I);
       else
         NodePreds.push_back(*I);
@@ -1034,9 +1042,9 @@ namespace {
         // CopyToReg should be close to its uses to facilitate coalescing and
         // avoid spilling.
         return 0;
-      if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
-          Opc == TargetInstrInfo::SUBREG_TO_REG ||
-          Opc == TargetInstrInfo::INSERT_SUBREG)
+      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+          Opc == TargetOpcode::SUBREG_TO_REG ||
+          Opc == TargetOpcode::INSERT_SUBREG)
         // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
         // close to their uses to facilitate coalescing.
         return 0;
@@ -1437,7 +1445,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
         while (SuccSU->Succs.size() == 1 &&
                SuccSU->getNode()->isMachineOpcode() &&
                SuccSU->getNode()->getMachineOpcode() ==
-                 TargetInstrInfo::COPY_TO_REGCLASS)
+                 TargetOpcode::COPY_TO_REGCLASS)
           SuccSU = SuccSU->Succs.front().getSUnit();
         // Don't constrain non-instruction nodes.
         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
@@ -1451,9 +1459,9 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
         // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
         // these may be coalesced away. We want them close to their uses.
         unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
-        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
-            SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
-            SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
+        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+            SuccOpc == TargetOpcode::INSERT_SUBREG ||
+            SuccOpc == TargetOpcode::SUBREG_TO_REG)
           continue;
         if ((!canClobber(SuccSU, DUSU) ||
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f1b6f1e..43cf37e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -829,6 +829,7 @@ void SelectionDAG::clear() {
   EntryNode.UseList = 0;
   AllNodes.push_back(&EntryNode);
   Root = getEntryNode();
+  delete Ordering;
   Ordering = new SDNodeOrdering();
 }
 
@@ -1925,19 +1926,28 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   }
   case ISD::SREM:
     if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      const APInt &RA = Rem->getAPIntValue();
-      if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
-        APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+      const APInt &RA = Rem->getAPIntValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
         ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
 
-        // If the sign bit of the first operand is zero, the sign bit of
-        // the result is zero. If the first operand has no one bits below
-        // the second operand's single 1 bit, its sign will be zero.
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
         if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
-          KnownZero2 |= ~LowBits;
+          KnownZero |= ~LowBits;
 
-        KnownZero |= KnownZero2 & Mask;
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
 
         assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
       }
@@ -2755,13 +2765,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
     // operations are lowered to scalars.
     if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
-      // If the indices are the same, return the inserted element.
-      if (N1.getOperand(2) == N2)
-        return N1.getOperand(1);
-      // If the indices are known different, extract the element from
+      // If the indices are the same, return the inserted element else
+      // if the indices are known different, extract the element from
       // the original vector.
-      else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
-               isa<ConstantSDNode>(N2))
+      if (N1.getOperand(2) == N2) {
+        if (VT == N1.getOperand(1).getValueType())
+          return N1.getOperand(1);
+        else
+          return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+      } else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
+                 isa<ConstantSDNode>(N2))
         return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
     }
     break;
@@ -3287,7 +3300,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, DstAlign);
+                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3298,10 +3311,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
       Store = DAG.getTruncStore(Chain, dl, Value,
-                             getMemBasePlusOffset(Dst, DstOff, DAG),
-                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+                                getMemBasePlusOffset(Dst, DstOff, DAG),
+                                DstSV, DstSVOff + DstOff, VT, false, false,
+                                DstAlign);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3346,7 +3360,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, Align);
+                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3361,7 +3375,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, DstAlign);
+                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3396,7 +3410,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff);
+                                 DstSV, DstSVOff + DstOff, false, false, 0);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3776,7 +3790,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
                       ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
                       const Value *SV, int SVOffset, EVT MemVT,
-                      bool isVolatile, unsigned Alignment) {
+                      bool isVolatile, bool isNonTemporal,
+                      unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
@@ -3790,6 +3805,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   unsigned Flags = MachineMemOperand::MOLoad;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             MemVT.getStoreSize(), Alignment);
@@ -3844,20 +3861,22 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
 SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               const Value *SV, int SVOffset,
-                              bool isVolatile, unsigned Alignment) {
+                              bool isVolatile, bool isNonTemporal,
+                              unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, VT, isVolatile, Alignment);
+                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
                                  const Value *SV,
                                  int SVOffset, EVT MemVT,
-                                 bool isVolatile, unsigned Alignment) {
+                                 bool isVolatile, bool isNonTemporal,
+                                 unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, MemVT, isVolatile, Alignment);
+                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue
@@ -3869,12 +3888,13 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
   return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
                  LD->getChain(), Base, Offset, LD->getSrcValue(),
                  LD->getSrcValueOffset(), LD->getMemoryVT(),
-                 LD->isVolatile(), LD->getAlignment());
+                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, const Value *SV, int SVOffset,
-                               bool isVolatile, unsigned Alignment) {
+                               bool isVolatile, bool isNonTemporal,
+                               unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
@@ -3888,6 +3908,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             Val.getValueType().getStoreSize(), Alignment);
@@ -3920,7 +3942,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     SDValue Ptr, const Value *SV,
                                     int SVOffset, EVT SVT,
-                                    bool isVolatile, unsigned Alignment) {
+                                    bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
@@ -3934,6 +3957,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
 
@@ -4860,23 +4885,23 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
 }
 
 /// getTargetExtractSubreg - A convenience function for creating
-/// TargetInstrInfo::EXTRACT_SUBREG nodes.
+/// TargetOpcode::EXTRACT_SUBREG nodes.
 SDValue
 SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
                                      SDValue Operand) {
   SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
-  SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL,
+  SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
                                   VT, Operand, SRIdxVal);
   return SDValue(Subreg, 0);
 }
 
 /// getTargetInsertSubreg - A convenience function for creating
-/// TargetInstrInfo::INSERT_SUBREG nodes.
+/// TargetOpcode::INSERT_SUBREG nodes.
 SDValue
 SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
                                     SDValue Operand, SDValue Subreg) {
   SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
-  SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL,
+  SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
                                   VT, Operand, Subreg, SRIdxVal);
   return SDValue(Result, 0);
 }
@@ -5212,11 +5237,12 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
       }
     }
     if (I == SortedPos) {
-      allnodes_iterator J = I;
-      SDNode *S = ++J;
-      dbgs() << "Offending node:\n";
+#ifndef NDEBUG
+      SDNode *S = ++I;
+      dbgs() << "Overran sorted position:\n";
       S->dumprFull();
-      assert(0 && "Overran sorted position");
+#endif
+      llvm_unreachable(0);
     }
   }
 
@@ -5237,7 +5263,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
 }
 
 /// AssignOrdering - Assign an order to the SDNode.
-void SelectionDAG::AssignOrdering(SDNode *SD, unsigned Order) {
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
   assert(SD && "Trying to assign an order to a null node!");
   Ordering->add(SD, Order);
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 23c7059..85ecb95 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -131,6 +131,17 @@ namespace {
       }
     }
 
+    /// areValueTypesLegal - Return true if types of all the values are legal.
+    bool areValueTypesLegal() {
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT RegisterVT = RegVTs[Value];
+        if (!TLI->isTypeLegal(RegisterVT))
+          return false;
+      }
+      return true;
+    }
+
+
     /// append - Add the specified values to this one.
     void append(const RegsForValue &RHS) {
       TLI = RHS.TLI;
@@ -176,7 +187,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Val = Parts[0];
-  DAG.AssignOrdering(Val.getNode(), Order);
 
   if (NumParts > 1) {
     // Assemble the value from multiple parts.
@@ -209,10 +219,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
 
       Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
 
-      DAG.AssignOrdering(Lo.getNode(), Order);
-      DAG.AssignOrdering(Hi.getNode(), Order);
-      DAG.AssignOrdering(Val.getNode(), Order);
-
       if (RoundParts < NumParts) {
         // Assemble the trailing non-power-of-2 part.
         unsigned OddParts = NumParts - RoundParts;
@@ -226,15 +232,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
           std::swap(Lo, Hi);
         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
-        DAG.AssignOrdering(Hi.getNode(), Order);
         Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
                                          TLI.getPointerTy()));
-        DAG.AssignOrdering(Hi.getNode(), Order);
         Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
-        DAG.AssignOrdering(Lo.getNode(), Order);
         Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
-        DAG.AssignOrdering(Val.getNode(), Order);
       }
     } else if (ValueVT.isVector()) {
       // Handle a multi-element vector.
@@ -275,7 +277,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       Val = DAG.getNode(IntermediateVT.isVector() ?
                         ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
                         ValueVT, &Ops[0], NumIntermediates);
-      DAG.AssignOrdering(Val.getNode(), Order);
     } else if (PartVT.isFloatingPoint()) {
       // FP split into multiple FP parts (for ppcf128)
       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
@@ -286,10 +287,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
-
-      DAG.AssignOrdering(Hi.getNode(), Order);
-      DAG.AssignOrdering(Lo.getNode(), Order);
-      DAG.AssignOrdering(Val.getNode(), Order);
     } else {
       // FP split into integer parts (soft fp)
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
@@ -307,18 +304,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
 
   if (PartVT.isVector()) {
     assert(ValueVT.isVector() && "Unknown vector conversion!");
-    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
-    DAG.AssignOrdering(Res.getNode(), Order);
-    return Res;
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
   }
 
   if (ValueVT.isVector()) {
     assert(ValueVT.getVectorElementType() == PartVT &&
            ValueVT.getVectorNumElements() == 1 &&
            "Only trivial scalar-to-vector conversions should get here!");
-    SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
-    DAG.AssignOrdering(Res.getNode(), Order);
-    return Res;
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
   }
 
   if (PartVT.isInteger() &&
@@ -330,36 +323,24 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       if (AssertOp != ISD::DELETED_NODE)
         Val = DAG.getNode(AssertOp, dl, PartVT, Val,
                           DAG.getValueType(ValueVT));
-      DAG.AssignOrdering(Val.getNode(), Order);
-      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-      DAG.AssignOrdering(Val.getNode(), Order);
-      return Val;
+      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
     } else {
-      Val = DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
-      DAG.AssignOrdering(Val.getNode(), Order);
-      return Val;
+      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
     }
   }
 
   if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
     if (ValueVT.bitsLT(Val.getValueType())) {
       // FP_ROUND's are always exact here.
-      Val = DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
-                        DAG.getIntPtrConstant(1));
-      DAG.AssignOrdering(Val.getNode(), Order);
-      return Val;
+      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+                         DAG.getIntPtrConstant(1));
     }
 
-    Val = DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
-    DAG.AssignOrdering(Val.getNode(), Order);
-    return Val;
+    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
   }
 
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
-    Val = DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
-    DAG.AssignOrdering(Val.getNode(), Order);
-    return Val;
-  }
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
   return SDValue();
@@ -414,8 +395,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       }
     }
 
-    DAG.AssignOrdering(Val.getNode(), Order);
-
     // The value may have changed - recompute ValueVT.
     ValueVT = Val.getValueType();
     assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
@@ -448,9 +427,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       NumParts = RoundParts;
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-
-      DAG.AssignOrdering(OddVal.getNode(), Order);
-      DAG.AssignOrdering(Val.getNode(), Order);
     }
 
     // The number of parts is a power of 2.  Repeatedly bisect the value using
@@ -460,8 +436,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
                                              ValueVT.getSizeInBits()),
                            Val);
 
-    DAG.AssignOrdering(Parts[0].getNode(), Order);
-
     for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
       for (unsigned i = 0; i < NumParts; i += StepSize) {
         unsigned ThisBits = StepSize * PartBits / 2;
@@ -476,16 +450,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
                             ThisVT, Part0,
                             DAG.getConstant(0, PtrVT));
 
-        DAG.AssignOrdering(Part0.getNode(), Order);
-        DAG.AssignOrdering(Part1.getNode(), Order);
-
         if (ThisBits == PartBits && ThisVT != PartVT) {
           Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
                                                 PartVT, Part0);
           Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
                                                 PartVT, Part1);
-          DAG.AssignOrdering(Part0.getNode(), Order);
-          DAG.AssignOrdering(Part1.getNode(), Order);
         }
       }
     }
@@ -511,7 +480,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       }
     }
 
-    DAG.AssignOrdering(Val.getNode(), Order);
     Parts[0] = Val;
     return;
   }
@@ -539,8 +507,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                            IntermediateVT, Val,
                            DAG.getConstant(i, PtrVT));
-
-    DAG.AssignOrdering(Ops[i].getNode(), Order);
   }
 
   // Split the intermediate operands into legal parts.
@@ -638,23 +604,34 @@ SDValue SelectionDAGBuilder::getControlRoot() {
   return Root;
 }
 
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+  DAG.AssignOrdering(Node, SDNodeOrder);
+
+  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+    AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
 void SelectionDAGBuilder::visit(Instruction &I) {
   visit(I.getOpcode(), I);
 }
 
 void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
-  // We're processing a new instruction.
-  ++SDNodeOrder;
-
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
   default: llvm_unreachable("Unknown instruction type encountered!");
     // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
-  case Instruction::OPCODE: return visit##OPCODE((CLASS&)I);
+    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
 #include "llvm/Instruction.def"
   }
+
+  // Assign the ordering to the freshly created DAG nodes.
+  if (NodeMap.count(&I)) {
+    ++SDNodeOrder;
+    AssignOrderingToNode(getValue(&I).getNode());
+  }
 }
 
 SDValue SelectionDAGBuilder::getValue(const Value *V) {
@@ -699,10 +676,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
           Constants.push_back(SDValue(Val, i));
       }
 
-      SDValue Res = DAG.getMergeValues(&Constants[0], Constants.size(),
-                                       getCurDebugLoc());
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
-      return Res;
+      return DAG.getMergeValues(&Constants[0], Constants.size(),
+                                getCurDebugLoc());
     }
 
     if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
@@ -725,10 +700,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
           Constants[i] = DAG.getConstant(0, EltVT);
       }
 
-      SDValue Res = DAG.getMergeValues(&Constants[0], NumElts,
-                                       getCurDebugLoc());
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
-      return Res;
+      return DAG.getMergeValues(&Constants[0], NumElts,
+                                getCurDebugLoc());
     }
 
     if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
@@ -756,10 +729,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     }
 
     // Create a BUILD_VECTOR node.
-    SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                              VT, &Ops[0], Ops.size());
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
-    return NodeMap[V] = Res;
+    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                    VT, &Ops[0], Ops.size());
   }
 
   // If this is a static alloca, generate it as the frameindex instead of
@@ -873,16 +844,11 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
       Chains[i] =
         DAG.getStore(Chain, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, 0);
-
-      DAG.AssignOrdering(Add.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder);
+                     Add, NULL, Offsets[i], false, false, 0);
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                         MVT::Other, &Chains[0], NumValues);
-
-    DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
   } else {
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
       SmallVector<EVT, 4> ValueVTs;
@@ -948,7 +914,6 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
 
   // Update the DAG with the new chain value resulting from return lowering.
   DAG.setRoot(Chain);
-  DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
 }
 
 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
@@ -1209,13 +1174,10 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) {
     CurMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch, emit the branch.
-    if (Succ0MBB != NextBlock) {
-      SDValue V = DAG.getNode(ISD::BR, getCurDebugLoc(),
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
-                              DAG.getBasicBlock(Succ0MBB));
-      DAG.setRoot(V);
-      DAG.AssignOrdering(V.getNode(), SDNodeOrder);
-    }
+                              DAG.getBasicBlock(Succ0MBB)));
 
     return;
   }
@@ -1321,8 +1283,6 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
     }
   }
 
-  DAG.AssignOrdering(Cond.getNode(), SDNodeOrder);
-
   // Update successor info
   CurMBB->addSuccessor(CB.TrueBB);
   CurMBB->addSuccessor(CB.FalseBB);
@@ -1340,13 +1300,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
     std::swap(CB.TrueBB, CB.FalseBB);
     SDValue True = DAG.getConstant(1, Cond.getValueType());
     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
-    DAG.AssignOrdering(Cond.getNode(), SDNodeOrder);
   }
 
   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, getControlRoot(), Cond,
                                DAG.getBasicBlock(CB.TrueBB));
-  DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
 
   // If the branch was constant folded, fix up the CFG.
   if (BrCond.getOpcode() == ISD::BR) {
@@ -1356,12 +1314,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
     if (BrCond == getControlRoot())
       CurMBB->removeSuccessor(CB.TrueBB);
 
-    if (CB.FalseBB != NextBlock) {
+    if (CB.FalseBB != NextBlock)
       BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
                            DAG.getBasicBlock(CB.FalseBB));
-
-      DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
-    }
   }
 
   DAG.setRoot(BrCond);
@@ -1379,10 +1334,6 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
                                     MVT::Other, Index.getValue(1),
                                     Table, Index);
   DAG.setRoot(BrJumpTable);
-
-  DAG.AssignOrdering(Index.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(Table.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(BrJumpTable.getNode(), SDNodeOrder);
 }
 
 /// visitJumpTableHeader - This function emits necessary code to produce index
@@ -1398,7 +1349,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                             DAG.getConstant(JTH.First, VT));
 
   // The SDNode we just created, which holds the value being switched on minus
-  // the the smallest case value, needs to be copied to a virtual register so it
+  // the smallest case value, needs to be copied to a virtual register so it
   // can be used as an index into the jump table in a subsequent basic block.
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
@@ -1417,11 +1368,6 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                              DAG.getConstant(JTH.Last-JTH.First,VT),
                              ISD::SETUGT);
 
-  DAG.AssignOrdering(Sub.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(SwitchOp.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(CMP.getNode(), SDNodeOrder);
-
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
@@ -1434,13 +1380,9 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                                MVT::Other, CopyTo, CMP,
                                DAG.getBasicBlock(JT.Default));
 
-  DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
-
-  if (JT.MBB != NextBlock) {
+  if (JT.MBB != NextBlock)
     BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
                          DAG.getBasicBlock(JT.MBB));
-    DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
-  }
 
   DAG.setRoot(BrCond);
 }
@@ -1467,11 +1409,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
                                     B.Reg, ShiftOp);
 
-  DAG.AssignOrdering(Sub.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(RangeCmp.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder);
-
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
@@ -1488,13 +1425,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
                                 MVT::Other, CopyTo, RangeCmp,
                                 DAG.getBasicBlock(B.Default));
 
-  DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder);
-
-  if (MBB != NextBlock) {
+  if (MBB != NextBlock)
     BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
                           DAG.getBasicBlock(MBB));
-    DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder);
-  }
 
   DAG.setRoot(BrRange);
 }
@@ -1520,11 +1453,6 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                                 AndOp, DAG.getConstant(0, TLI.getPointerTy()),
                                 ISD::SETNE);
 
-  DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(SwitchVal.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(AndOp.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(AndCmp.getNode(), SDNodeOrder);
-
   CurMBB->addSuccessor(B.TargetBB);
   CurMBB->addSuccessor(NextMBB);
 
@@ -1532,8 +1460,6 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
                               MVT::Other, getControlRoot(),
                               AndCmp, DAG.getBasicBlock(B.TargetBB));
 
-  DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder);
-
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
   MachineBasicBlock *NextBlock = 0;
@@ -1541,11 +1467,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
-  if (NextMBB != NextBlock) {
+  if (NextMBB != NextBlock)
     BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
                         DAG.getBasicBlock(NextMBB));
-    DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder);
-  }
 
   DAG.setRoot(BrAnd);
 }
@@ -1570,11 +1494,9 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
   CurMBB->addSuccessor(LandingPad);
 
   // Drop into normal successor.
-  SDValue Branch = DAG.getNode(ISD::BR, getCurDebugLoc(),
-                               MVT::Other, getControlRoot(),
-                               DAG.getBasicBlock(Return));
-  DAG.setRoot(Branch);
-  DAG.AssignOrdering(Branch.getNode(), SDNodeOrder);
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
 }
 
 void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
@@ -1733,8 +1655,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   std::vector<MachineBasicBlock*> DestBBs;
   APInt TEI = First;
   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
-    const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
-    const APInt& High = cast<ConstantInt>(I->High)->getValue();
+    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+    const APInt &High = cast<ConstantInt>(I->High)->getValue();
 
     if (Low.sle(TEI) && TEI.sle(High)) {
       DestBBs.push_back(I->BB);
@@ -1757,7 +1679,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
 
   // Create a jump table index for this jump table, or return an existing
   // one.
-  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+  unsigned JTEncoding = TLI.getJumpTableEncoding();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+                       ->getJumpTableIndex(DestBBs);
 
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
@@ -2086,13 +2010,10 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
 
     // If this is not a fall-through branch, emit the branch.
     CurMBB->addSuccessor(Default);
-    if (Default != NextBlock) {
-      SDValue Res = DAG.getNode(ISD::BR, getCurDebugLoc(),
-                                MVT::Other, getControlRoot(),
-                                DAG.getBasicBlock(Default));
-      DAG.setRoot(Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
-    }
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Default)));
 
     return;
   }
@@ -2141,15 +2062,19 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
 }
 
 void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
-  // Update machine-CFG edges.
+  // Update machine-CFG edges with unique successors.
+  SmallVector<BasicBlock*, 32> succs;
+  succs.reserve(I.getNumSuccessors());
   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
-
-  SDValue Res = DAG.getNode(ISD::BRIND, getCurDebugLoc(),
-                            MVT::Other, getControlRoot(),
-                            getValue(I.getAddress()));
-  DAG.setRoot(Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    succs.push_back(I.getSuccessor(i));
+  array_pod_sort(succs.begin(), succs.end());
+  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+  for (unsigned i = 0, e = succs.size(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
 }
 
 void SelectionDAGBuilder::visitFSub(User &I) {
@@ -2164,10 +2089,8 @@ void SelectionDAGBuilder::visitFSub(User &I) {
       Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
       if (CV == CNZ) {
         SDValue Op2 = getValue(I.getOperand(1));
-        SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                  Op2.getValueType(), Op2);
-        setValue(&I, Res);
-        DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                                 Op2.getValueType(), Op2));
         return;
       }
     }
@@ -2176,10 +2099,8 @@ void SelectionDAGBuilder::visitFSub(User &I) {
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
     if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
       SDValue Op2 = getValue(I.getOperand(1));
-      SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                Op2.getValueType(), Op2);
-      setValue(&I, Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                               Op2.getValueType(), Op2));
       return;
     }
 
@@ -2189,10 +2110,8 @@ void SelectionDAGBuilder::visitFSub(User &I) {
 void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  SDValue Res = DAG.getNode(OpCode, getCurDebugLoc(),
-                            Op1.getValueType(), Op1, Op2);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
 }
 
 void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
@@ -2225,12 +2144,8 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
                         TLI.getPointerTy(), Op2);
   }
 
-  SDValue Res = DAG.getNode(Opcode, getCurDebugLoc(),
-                            Op1.getValueType(), Op1, Op2);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(Op2.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
 }
 
 void SelectionDAGBuilder::visitICmp(User &I) {
@@ -2244,9 +2159,7 @@ void SelectionDAGBuilder::visitICmp(User &I) {
   ISD::CondCode Opcode = getICmpCondCode(predicate);
 
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
 }
 
 void SelectionDAGBuilder::visitFCmp(User &I) {
@@ -2259,9 +2172,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) {
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
 
 void SelectionDAGBuilder::visitSelect(User &I) {
@@ -2275,7 +2186,7 @@ void SelectionDAGBuilder::visitSelect(User &I) {
   SDValue TrueVal  = getValue(I.getOperand(1));
   SDValue FalseVal = getValue(I.getOperand(2));
 
-  for (unsigned i = 0; i != NumValues; ++i) {
+  for (unsigned i = 0; i != NumValues; ++i)
     Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
                             TrueVal.getNode()->getValueType(i), Cond,
                             SDValue(TrueVal.getNode(),
@@ -2283,23 +2194,16 @@ void SelectionDAGBuilder::visitSelect(User &I) {
                             SDValue(FalseVal.getNode(),
                                     FalseVal.getResNo() + i));
 
-    DAG.AssignOrdering(Values[i].getNode(), SDNodeOrder);
-  }
-
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                            DAG.getVTList(&ValueVTs[0], NumValues),
-                            &Values[0], NumValues);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
 }
 
 void SelectionDAGBuilder::visitTrunc(User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitZExt(User &I) {
@@ -2307,9 +2211,7 @@ void SelectionDAGBuilder::visitZExt(User &I) {
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitSExt(User &I) {
@@ -2317,64 +2219,50 @@ void SelectionDAGBuilder::visitSExt(User &I) {
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPTrunc(User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                            DestVT, N, DAG.getIntPtrConstant(0));
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+                           DestVT, N, DAG.getIntPtrConstant(0)));
 }
 
 void SelectionDAGBuilder::visitFPExt(User &I){
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToUI(User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToSI(User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitUIToFP(User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitSIToFP(User &I){
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitPtrToInt(User &I) {
@@ -2383,9 +2271,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT SrcVT = N.getValueType();
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
 void SelectionDAGBuilder::visitIntToPtr(User &I) {
@@ -2394,9 +2280,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT SrcVT = N.getValueType();
   EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
 }
 
 void SelectionDAGBuilder::visitBitCast(User &I) {
@@ -2405,14 +2289,11 @@ void SelectionDAGBuilder::visitBitCast(User &I) {
 
   // BitCast assures us that source and destination are the same size so this is
   // either a BIT_CONVERT or a no-op.
-  if (DestVT != N.getValueType()) {
-    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
-                              DestVT, N); // convert types.
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
-  } else {
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                             DestVT, N)); // convert types.
+  else
     setValue(&I, N);            // noop cast.
-  }
 }
 
 void SelectionDAGBuilder::visitInsertElement(User &I) {
@@ -2421,13 +2302,9 @@ void SelectionDAGBuilder::visitInsertElement(User &I) {
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                               TLI.getPointerTy(),
                               getValue(I.getOperand(2)));
-  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
-                            TLI.getValueType(I.getType()),
-                            InVec, InVal, InIdx);
-  setValue(&I, Res);
-
-  DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
 }
 
 void SelectionDAGBuilder::visitExtractElement(User &I) {
@@ -2435,15 +2312,10 @@ void SelectionDAGBuilder::visitExtractElement(User &I) {
   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                               TLI.getPointerTy(),
                               getValue(I.getOperand(1)));
-  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                            TLI.getValueType(I.getType()), InVec, InIdx);
-  setValue(&I, Res);
-
-  DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()), InVec, InIdx));
 }
 
-
 // Utility for visitShuffleVector - Returns true if the mask is mask starting
 // from SIndx and increasing to the element length (undefs are allowed).
 static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
@@ -2462,8 +2334,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
   // Convert the ConstantVector mask operand into an array of ints, with -1
   // representing undef values.
   SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(),
-                                                     MaskElts);
+  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
   unsigned MaskNumElts = MaskElts.size();
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (isa<UndefValue>(MaskElts[i]))
@@ -2477,10 +2348,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
   if (SrcNumElts == MaskNumElts) {
-    SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
-                                       &Mask[0]);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
     return;
   }
 
@@ -2491,10 +2360,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
     // lengths match.
     if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
       // The shuffle is concatenating two vectors together.
-      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                                VT, Src1, Src2);
-      setValue(&I, Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                               VT, Src1, Src2));
       return;
     }
 
@@ -2526,12 +2393,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
         MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
     }
 
-    SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
-                                       &MappedOps[0]);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Src1.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Src2.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &MappedOps[0]));
     return;
   }
 
@@ -2583,9 +2446,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
     }
 
     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
-      SDValue Res = DAG.getUNDEF(VT);
-      setValue(&I, Res);  // Vectors are not used.
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
       return;
     }
     else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
@@ -2597,8 +2458,6 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
         else
           Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
                             Src, DAG.getIntPtrConstant(StartIdx[Input]));
-
-        DAG.AssignOrdering(Src.getNode(), SDNodeOrder);
       }
 
       // Calculate new mask.
@@ -2613,10 +2472,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
           MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
       }
 
-      SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
-                                         &MappedOps[0]);
-      setValue(&I, Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
       return;
     }
   }
@@ -2643,14 +2500,11 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
                           DAG.getConstant(Idx - SrcNumElts, PtrVT));
 
       Ops.push_back(Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     }
   }
 
-  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                            VT, &Ops[0], Ops.size());
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                           VT, &Ops[0], Ops.size()));
 }
 
 void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
@@ -2689,11 +2543,9 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
                 SDValue(Agg.getNode(), Agg.getResNo() + i);
 
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                            DAG.getVTList(&AggValueVTs[0], NumAggValues),
-                            &Values[0], NumAggValues);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
+                           &Values[0], NumAggValues));
 }
 
 void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
@@ -2719,11 +2571,9 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
         DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
         SDValue(Agg.getNode(), Agg.getResNo() + i);
 
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                            DAG.getVTList(&ValValueVTs[0], NumValValues),
-                            &Values[0], NumValValues);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValValueVTs[0], NumValValues),
+                           &Values[0], NumValValues));
 }
 
 void SelectionDAGBuilder::visitGetElementPtr(User &I) {
@@ -2740,7 +2590,6 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
                         DAG.getIntPtrConstant(Offset));
-        DAG.AssignOrdering(N.getNode(), SDNodeOrder);
       }
 
       Ty = StTy->getElementType(Field);
@@ -2764,9 +2613,6 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
 
         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
                         OffsVal);
-
-        DAG.AssignOrdering(OffsVal.getNode(), SDNodeOrder);
-        DAG.AssignOrdering(N.getNode(), SDNodeOrder);
         continue;
       }
 
@@ -2792,13 +2638,10 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
                              N.getValueType(), IdxN, Scale);
         }
-
-        DAG.AssignOrdering(IdxN.getNode(), SDNodeOrder);
       }
 
       N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
                       N.getValueType(), N, IdxN);
-      DAG.AssignOrdering(N.getNode(), SDNodeOrder);
     }
   }
 
@@ -2823,11 +2666,8 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
                           AllocSize,
                           DAG.getConstant(TySize, AllocSize.getValueType()));
 
-  DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
-
   EVT IntPtr = TLI.getPointerTy();
   AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
-  DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -2842,13 +2682,11 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
   AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
                           AllocSize.getValueType(), AllocSize,
                           DAG.getIntPtrConstant(StackAlign-1));
-  DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
 
   // Mask out the low bits for alignment purposes.
   AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
                           AllocSize.getValueType(), AllocSize,
                           DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
-  DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
 
   SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
@@ -2856,7 +2694,6 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
                             VTs, Ops, 3);
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
-  DAG.AssignOrdering(DSA.getNode(), SDNodeOrder);
 
   // Inform the Frame Information that we have just allocated a variable-sized
   // object.
@@ -2868,7 +2705,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
   SDValue Ptr = getValue(SV);
 
   const Type *Ty = I.getType();
+
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -2900,13 +2739,11 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, Alignment);
+                            A, SV, Offsets[i], isVolatile, 
+                            isNonTemporal, Alignment);
 
     Values[i] = L;
     Chains[i] = L.getValue(1);
-
-    DAG.AssignOrdering(A.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(L.getNode(), SDNodeOrder);
   }
 
   if (!ConstantMemory) {
@@ -2916,15 +2753,11 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
       DAG.setRoot(Chain);
     else
       PendingLoads.push_back(Chain);
-
-    DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
   }
 
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                            DAG.getVTList(&ValueVTs[0], NumValues),
-                            &Values[0], NumValues);
-  setValue(&I, Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
 }
 
 void SelectionDAGBuilder::visitStore(StoreInst &I) {
@@ -2948,6 +2781,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
   SmallVector<SDValue, 4> Chains(NumValues);
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   for (unsigned i = 0; i != NumValues; ++i) {
@@ -2955,16 +2789,12 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
                               DAG.getConstant(Offsets[i], PtrVT));
     Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
                              SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, Alignment);
-
-    DAG.AssignOrdering(Add.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder);
+                             Add, PtrV, Offsets[i], isVolatile, 
+                             isNonTemporal, Alignment);
   }
 
-  SDValue Res = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                            MVT::Other, &Chains[0], NumValues);
-  DAG.setRoot(Res);
-  DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                          MVT::Other, &Chains[0], NumValues));
 }
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3035,8 +2865,6 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
                          VTs, &Ops[0], Ops.size());
   }
 
-  DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
-
   if (HasChain) {
     SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
     if (OnlyLoad)
@@ -3049,7 +2877,6 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
       EVT VT = TLI.getValueType(PTy);
       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
-      DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
     }
 
     setValue(&I, Result);
@@ -3068,12 +2895,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
                            DAG.getConstant(0x007fffff, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
                            DAG.getConstant(0x3f800000, MVT::i32));
-  SDValue Res = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
-
-  DAG.AssignOrdering(t1.getNode(), Order);
-  DAG.AssignOrdering(t2.getNode(), Order);
-  DAG.AssignOrdering(Res.getNode(), Order);
-  return Res;
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
 }
 
 /// GetExponent - Get the exponent:
@@ -3090,13 +2912,7 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
                            DAG.getConstant(23, TLI.getPointerTy()));
   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
                            DAG.getConstant(127, MVT::i32));
-  SDValue Res = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
-
-  DAG.AssignOrdering(t0.getNode(), Order);
-  DAG.AssignOrdering(t1.getNode(), Order);
-  DAG.AssignOrdering(t2.getNode(), Order);
-  DAG.AssignOrdering(Res.getNode(), Order);
-  return Res;
+  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
 }
 
 /// getF32Constant - Get 32-bit floating point constant.
@@ -3120,7 +2936,6 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
                   I.getOperand(1));
   setValue(&I, L);
   DAG.setRoot(L.getValue(1));
-  DAG.AssignOrdering(L.getNode(), SDNodeOrder);
   return 0;
 }
 
@@ -3131,10 +2946,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
   SDValue Op2 = getValue(I.getOperand(2));
 
   SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-  SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
-
-  setValue(&I, Result);
-  DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
+  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
   return 0;
 }
 
@@ -3162,15 +2974,9 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
 
-    DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(X.getNode(), SDNodeOrder);
-
     //   IntegerPartOfX <<= 23;
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
 
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
@@ -3194,14 +3000,6 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
                                TwoToFracPartOfX, IntegerPartOfX);
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3228,16 +3026,6 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
                                TwoToFracPartOfX, IntegerPartOfX);
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3277,29 +3065,12 @@ SelectionDAGBuilder::visitExp(CallInst &I) {
                                 TwoToFracPartOfX, IntegerPartOfX);
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
     result = DAG.getNode(ISD::FEXP, dl,
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -3317,15 +3088,11 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
     SDValue Op = getValue(I.getOperand(1));
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
-    DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
-
     // Scale the exponent by log(2) [0.69314718f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3f317218));
 
-    DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
-
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
@@ -3348,12 +3115,6 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, LogOfMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3380,16 +3141,6 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, LogOfMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3424,27 +3175,12 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, LogOfMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
     result = DAG.getNode(ISD::FLOG, dl,
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -3462,13 +3198,9 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
     SDValue Op = getValue(I.getOperand(1));
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
-    DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
-
     // Get the exponent.
     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
 
-    DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
-
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
@@ -3491,12 +3223,6 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log2ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3523,16 +3249,6 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log2ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3568,27 +3284,12 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log2ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
     result = DAG.getNode(ISD::FLOG2, dl,
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -3606,15 +3307,11 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
     SDValue Op = getValue(I.getOperand(1));
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
-    DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
-
     // Scale the exponent by log10(2) [0.30102999f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3e9a209a));
 
-    DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
-
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
@@ -3637,12 +3334,6 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log10ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3665,14 +3356,6 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log10ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3703,25 +3386,12 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
 
       result = DAG.getNode(ISD::FADD, dl,
                            MVT::f32, LogOfExponent, Log10ofMantissa);
-
-      DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
     result = DAG.getNode(ISD::FLOG10, dl,
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -3740,8 +3410,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 
     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
 
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
-
     //   FractionalPartOfX = x - (float)IntegerPartOfX;
     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
@@ -3750,10 +3418,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
-    DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(X.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
-
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3775,14 +3439,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3808,16 +3464,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3854,29 +3500,12 @@ SelectionDAGBuilder::visitExp2(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
     result = DAG.getNode(ISD::FEXP2, dl,
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -3918,17 +3547,10 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
 
-    DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(X.getNode(), SDNodeOrder);
-
     //   IntegerPartOfX <<= 23;
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
-    DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
-
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3950,14 +3572,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3983,16 +3597,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4029,22 +3633,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
 
       result = DAG.getNode(ISD::BIT_CONVERT, dl,
                            MVT::f32, TwoToFractionalPartOfX);
-
-      DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(result.getNode(), SDNodeOrder);
     }
   } else {
     // No special expansion.
@@ -4052,7 +3640,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
                          getValue(I.getOperand(1)).getValueType(),
                          getValue(I.getOperand(1)),
                          getValue(I.getOperand(2)));
-    DAG.AssignOrdering(result.getNode(), SDNodeOrder);
   }
 
   setValue(&I, result);
@@ -4129,16 +3716,12 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::vaend:    visitVAEnd(I); return 0;
   case Intrinsic::vacopy:   visitVACopy(I); return 0;
   case Intrinsic::returnaddress:
-    Res = DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::frameaddress:
-    Res = DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::setjmp:
     return "_setjmp"+!TLI.usesUnderscoreSetJmp();
@@ -4149,10 +3732,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getOperand(2));
     SDValue Op3 = getValue(I.getOperand(3));
     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                        I.getOperand(1), 0, I.getOperand(2), 0);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                              I.getOperand(1), 0, I.getOperand(2), 0));
     return 0;
   }
   case Intrinsic::memset: {
@@ -4160,10 +3741,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getOperand(2));
     SDValue Op3 = getValue(I.getOperand(3));
     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    Res = DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
-                        I.getOperand(1), 0);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+                              I.getOperand(1), 0));
     return 0;
   }
   case Intrinsic::memmove: {
@@ -4179,20 +3758,18 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       Size = C->getZExtValue();
     if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
         AliasAnalysis::NoAlias) {
-      Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                          I.getOperand(1), 0, I.getOperand(2), 0);
-      DAG.setRoot(Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                                I.getOperand(1), 0, I.getOperand(2), 0));
       return 0;
     }
 
-    Res = DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
-                         I.getOperand(1), 0, I.getOperand(2), 0);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+                               I.getOperand(1), 0, I.getOperand(2), 0));
     return 0;
   }
   case Intrinsic::dbg_declare: {
+    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+    // The real handling of this intrinsic is in FastISel.
     if (OptLevel != CodeGenOpt::None)
       // FIXME: Variable debug info is not supported here.
       return 0;
@@ -4205,6 +3782,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
     MDNode *Variable = DI.getVariable();
     Value *Address = DI.getAddress();
+    if (!Address)
+      return 0;
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
       Address = BCI->getOperand(0);
     AllocaInst *AI = dyn_cast<AllocaInst>(Address);
@@ -4222,6 +3801,39 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
         MMI->setVariableDbgInfo(Variable, FI, Dbg);
     return 0;
   }
+  case Intrinsic::dbg_value: {
+    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
+    // The real handling of this intrinsic is in FastISel.
+    if (OptLevel != CodeGenOpt::None)
+      // FIXME: Variable debug info is not supported here.
+      return 0;
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
+    DbgValueInst &DI = cast<DbgValueInst>(I);
+    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    Value *V = DI.getValue();
+    if (!V)
+      return 0;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+      V = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end())
+      return 0; // VLAs.
+    int FI = SI->second;
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
+      if (MDNode *Dbg = DI.getMetadata("dbg"))
+        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    return 0;
+  }
   case Intrinsic::eh_exception: {
     // Insert the EXCEPTIONADDR instruction.
     assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
@@ -4231,7 +3843,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
     setValue(&I, Op);
     DAG.setRoot(Op.getValue(1));
-    DAG.AssignOrdering(Op.getNode(), SDNodeOrder);
     return 0;
   }
 
@@ -4255,13 +3866,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     Ops[0] = getValue(I.getOperand(1));
     Ops[1] = getRoot();
     SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-
     DAG.setRoot(Op.getValue(1));
-
-    Res = DAG.getSExtOrTrunc(Op, dl, MVT::i32);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Op.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
     return 0;
   }
 
@@ -4279,7 +3885,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     }
 
     setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
 
@@ -4287,13 +3892,11 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_return_i64:
     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
       MMI->setCallsEHReturn(true);
-      Res = DAG.getNode(ISD::EH_RETURN, dl,
-                        MVT::Other,
-                        getControlRoot(),
-                        getValue(I.getOperand(1)),
-                        getValue(I.getOperand(2)));
-      DAG.setRoot(Res);
-      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                              MVT::Other,
+                              getControlRoot(),
+                              getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2))));
     } else {
       setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
     }
@@ -4316,15 +3919,20 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
                              TLI.getPointerTy(),
                              DAG.getConstant(0, TLI.getPointerTy()));
-    Res = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
-                      FA, Offset);
-    setValue(&I, Res);
-    DAG.AssignOrdering(CfaArg.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Offset.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(FA.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+                             FA, Offset));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_callsite: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+    assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+    MMI->setCurrentCallSite(CI->getZExtValue());
     return 0;
   }
+
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4355,35 +3963,26 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                                getValue(I.getOperand(3)),
                                Code);
     setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::sqrt:
-    Res = DAG.getNode(ISD::FSQRT, dl,
-                      getValue(I.getOperand(1)).getValueType(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::powi:
-    Res = ExpandPowI(dl, getValue(I.getOperand(1)), getValue(I.getOperand(2)),
-                     DAG);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)),
+                            getValue(I.getOperand(2)), DAG));
     return 0;
   case Intrinsic::sin:
-    Res = DAG.getNode(ISD::FSIN, dl,
-                      getValue(I.getOperand(1)).getValueType(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::FSIN, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::cos:
-    Res = DAG.getNode(ISD::FCOS, dl,
-                      getValue(I.getOperand(1)).getValueType(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::FCOS, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::log:
     visitLog(I);
@@ -4405,9 +4004,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::pcmarker: {
     SDValue Tmp = getValue(I.getOperand(1));
-    Res = DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
     return 0;
   }
   case Intrinsic::readcyclecounter: {
@@ -4417,38 +4014,29 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                       &Op, 1);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::bswap:
-    Res = DAG.getNode(ISD::BSWAP, dl,
-                      getValue(I.getOperand(1)).getValueType(),
-                      getValue(I.getOperand(1)));
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getOperand(1));
     EVT Ty = Arg.getValueType();
-    Res = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getOperand(1));
     EVT Ty = Arg.getValueType();
-    Res = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctpop: {
     SDValue Arg = getValue(I.getOperand(1));
     EVT Ty = Arg.getValueType();
-    Res = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::stacksave: {
@@ -4457,14 +4045,11 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                       DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::stackrestore: {
     Res = getValue(I.getOperand(1));
-    Res = DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
     return 0;
   }
   case Intrinsic::stackprotector: {
@@ -4484,10 +4069,9 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // Store the stack protector onto the stack.
     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
                        PseudoSourceValue::getFixedStack(FI),
-                       0, true);
+                       0, true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::objectsize: {
@@ -4505,7 +4089,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       Res = DAG.getConstant(0, Ty);
 
     setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::var_annotation:
@@ -4529,7 +4112,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::gcroot:
@@ -4546,14 +4128,10 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
     return 0;
   case Intrinsic::flt_rounds:
-    Res = DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32);
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
   case Intrinsic::trap:
-    Res = DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot());
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
     return 0;
   case Intrinsic::uadd_with_overflow:
     return implVisitAluOverflow(I, ISD::UADDO);
@@ -4574,9 +4152,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     Ops[1] = getValue(I.getOperand(1));
     Ops[2] = getValue(I.getOperand(2));
     Ops[3] = getValue(I.getOperand(3));
-    Res = DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
     return 0;
   }
 
@@ -4586,9 +4162,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     for (int x = 1; x < 6; ++x)
       Ops[x] = getValue(I.getOperand(x));
 
-    Res = DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6);
-    DAG.setRoot(Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
     return 0;
   }
   case Intrinsic::atomic_cmp_swap: {
@@ -4603,7 +4177,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
                     I.getOperand(1));
     setValue(&I, L);
     DAG.setRoot(L.getValue(1));
-    DAG.AssignOrdering(L.getNode(), SDNodeOrder);
     return 0;
   }
   case Intrinsic::atomic_load_add:
@@ -4632,9 +4205,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::invariant_start:
   case Intrinsic::lifetime_start:
     // Discard region information.
-    Res = DAG.getUNDEF(TLI.getPointerTy());
-    setValue(&I, Res);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
     return 0;
   case Intrinsic::invariant_end:
   case Intrinsic::lifetime_end:
@@ -4649,19 +4220,25 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-/// For target-dependent requirements, a target should override
-/// TargetLowering::IsEligibleForTailCallOptimization.
-///
 static bool
-isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
+isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
                      const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
   const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
   const Function *F = ExitBB->getParent();
 
-  // The block must end in a return statement or an unreachable.
-  if (!Ret && !isa<UnreachableInst>(Term)) return false;
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
 
   // If I will have a chain, make sure no other instruction that will have a
   // chain interposes between I and the return.
@@ -4690,6 +4267,10 @@ isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
   if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
     return false;
 
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
   // Otherwise, make sure the unmodified return value of I is the return value.
   for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
        U = dyn_cast<Instruction>(U->getOperand(0))) {
@@ -4785,6 +4366,15 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
     // used to detect deletion of the invoke via the MachineModuleInfo.
     BeginLabel = MMI->NextLabelID();
 
+    // For SjLj, keep track of which landing pads go with which invokes
+    // so as to maintain the ordering of pads in the LSDA.
+    unsigned CallSiteIndex = MMI->getCurrentCallSite();
+    if (CallSiteIndex) {
+      MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      // Now that the call site is handled, stop tracking it.
+      MMI->setCurrentCallSite(0);
+    }
+
     // Both PendingLoads and PendingExports must be flushed here;
     // this call might not return.
     (void)getRoot();
@@ -4795,9 +4385,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
   // Check if target-independent constraints permit a tail call here.
   // Target-dependent constraints are checked within TLI.LowerCallTo.
   if (isTailCall &&
-      !isInTailCallPosition(CS.getInstruction(),
-                            CS.getAttributes().getRetAttributes(),
-                            TLI))
+      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
     isTailCall = false;
 
   std::pair<SDValue,SDValue> Result =
@@ -4815,7 +4403,6 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
          "Null value expected with tail call!");
   if (Result.first.getNode()) {
     setValue(CS.getInstruction(), Result.first);
-    DAG.AssignOrdering(Result.first.getNode(), SDNodeOrder);
   } else if (!CanLowerReturn && Result.second.getNode()) {
     // The instruction result is the result of loading from the
     // hidden sret parameter.
@@ -4834,7 +4421,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
                                 DemoteStackSlot,
                                 DAG.getConstant(Offsets[i], PtrVT));
       SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, 1);
+                              Add, NULL, Offsets[i], false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -4860,27 +4447,22 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
         getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
                          RegisterVT, VT, AssertOp);
       ReturnValues.push_back(ReturnValue);
-      DAG.AssignOrdering(ReturnValue.getNode(), SDNodeOrder);
       CurReg += NumRegs;
     }
-    SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                              DAG.getVTList(&RetTys[0], RetTys.size()),
-                              &ReturnValues[0], ReturnValues.size());
 
-    setValue(CS.getInstruction(), Res);
+    setValue(CS.getInstruction(),
+             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                         DAG.getVTList(&RetTys[0], RetTys.size()),
+                         &ReturnValues[0], ReturnValues.size()));
 
-    DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
-    DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
   }
 
   // As a special case, a null chain means that a tail call has been emitted and
   // the DAG root is already updated.
-  if (Result.second.getNode()) {
+  if (Result.second.getNode())
     DAG.setRoot(Result.second);
-    DAG.AssignOrdering(Result.second.getNode(), SDNodeOrder);
-  } else {
+  else
     HasTailCall = true;
-  }
 
   if (LandingPad && MMI) {
     // Insert a label at the end of the invoke call to mark the try range.  This
@@ -4941,7 +4523,8 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
   SDValue Ptr = Builder.getValue(PtrVal);
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                         Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
-                                        false /*volatile*/, 1 /* align=1 */);
+                                        false /*volatile*/,
+                                        false /*nontemporal*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
     Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5054,7 +4637,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
       StringRef Name = F->getName();
       if (Name == "copysign" || Name == "copysignf") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.getType() == I.getOperand(2)->getType()) {
           SDValue LHS = getValue(I.getOperand(1));
@@ -5065,7 +4648,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
@@ -5074,7 +4657,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -5084,7 +4667,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -5094,7 +4677,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -5120,9 +4703,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
 
   // Check if we can potentially perform a tail call. More detailed checking is
   // be done within LowerCallTo, after more information about the call is known.
-  bool isTailCall = PerformTailCallOpt && I.isTailCall();
-
-  LowerCallTo(&I, Callee, isTailCall);
+  LowerCallTo(&I, Callee, I.isTailCall());
 }
 
 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
@@ -5152,7 +4733,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
       }
 
       Chain = P.getValue(1);
-      DAG.AssignOrdering(P.getNode(), Order);
 
       // If the source register was virtual and if we know something about it,
       // add an assert node.
@@ -5188,11 +4768,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
           else if (NumZeroBits >= RegSize-32)
             isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
 
-          if (FromVT != MVT::Other) {
+          if (FromVT != MVT::Other)
             P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
                             RegisterVT, P, DAG.getValueType(FromVT));
-            DAG.AssignOrdering(P.getNode(), Order);
-          }
         }
       }
 
@@ -5201,16 +4779,13 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
 
     Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
                                      NumRegs, RegisterVT, ValueVT);
-    DAG.AssignOrdering(Values[Value].getNode(), Order);
     Part += NumRegs;
     Parts.clear();
   }
 
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
-                            DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
-                            &Values[0], ValueVTs.size());
-  DAG.AssignOrdering(Res.getNode(), Order);
-  return Res;
+  return DAG.getNode(ISD::MERGE_VALUES, dl,
+                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                     &Values[0], ValueVTs.size());
 }
 
 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -5246,7 +4821,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
     }
 
     Chains[i] = Part.getValue(0);
-    DAG.AssignOrdering(Part.getNode(), Order);
   }
 
   if (NumRegs == 1 || Flag)
@@ -5263,8 +4837,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
     Chain = Chains[NumRegs-1];
   else
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
-
-  DAG.AssignOrdering(Chain.getNode(), Order);
 }
 
 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
@@ -5281,16 +4853,12 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code,
   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
   Ops.push_back(Res);
 
-  DAG.AssignOrdering(Res.getNode(), Order);
-
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
     unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
     EVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
-      SDValue Res = DAG.getRegister(Regs[Reg++], RegisterVT);
-      Ops.push_back(Res);
-      DAG.AssignOrdering(Res.getNode(), Order);
+      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
     }
   }
 }
@@ -5309,7 +4877,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
     EVT ThisVT = MVT::Other;
 
     const TargetRegisterClass *RC = *RCI;
-    // If none of the the value types for this register class are valid, we
+    // If none of the value types for this register class are valid, we
     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
          I != E; ++I) {
@@ -5509,8 +5077,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
-
-      DAG.AssignOrdering(OpInfo.CallOperand.getNode(), SDNodeOrder);
     }
 
     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
@@ -5776,7 +5342,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0);
+                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
 
@@ -5972,7 +5539,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
+      if (OpInfo.AssignedRegs.Regs.empty() ||
+          !OpInfo.AssignedRegs.areValueTypesLegal()) {
         llvm_report_error("Couldn't allocate input reg for"
                           " constraint '"+ OpInfo.ConstraintCode +"'!");
       }
@@ -6061,7 +5629,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                StoresToEmit[i].first,
                                getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0);
+                               StoresToEmit[i].second, 0,
+                               false, false, 0);
     OutChains.push_back(Val);
   }
 
@@ -6116,9 +5685,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
                             unsigned Order) {
-  assert((!isTailCall || PerformTailCallOpt) &&
-         "isTailCall set when tail-call optimizations are disabled!");
-
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -6207,12 +5773,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     }
   }
 
-  // Check if target-dependent constraints permit a tail call here.
-  // Target-independent constraints should be checked by the caller.
-  if (isTailCall &&
-      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
-    isTailCall = false;
-
   SmallVector<SDValue, 4> InVals;
   Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
                     Outs, Ins, dl, DAG, InVals);
@@ -6231,8 +5791,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                  "LowerCall emitted a value with the wrong type!");
         });
 
-  DAG.AssignOrdering(Chain.getNode(), Order);
-
   // For a tail call, the return value is merely live-out and there aren't
   // any nodes in the DAG representing it. Return a special value to
   // indicate that a tail call has been emitted and no more Instructions
@@ -6256,11 +5814,9 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
 
-    SDValue ReturnValue =
-      getCopyFromParts(DAG, dl, Order, &InVals[CurReg], NumRegs,
-                       RegisterVT, VT, AssertOp);
-    ReturnValues.push_back(ReturnValue);
-    DAG.AssignOrdering(ReturnValue.getNode(), Order);
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+                                            NumRegs, RegisterVT, VT,
+                                            AssertOp));
     CurReg += NumRegs;
   }
 
@@ -6273,7 +5829,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
                             DAG.getVTList(&RetTys[0], RetTys.size()),
                             &ReturnValues[0], ReturnValues.size());
-  DAG.AssignOrdering(Res.getNode(), Order);
   return std::make_pair(Res, Chain);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index db656e3..bc4b33d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -342,6 +342,11 @@ public:
 
   void CopyValueToVirtualRegister(Value *V, unsigned Reg);
 
+  /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+  /// from how the code appeared in the source. The ordering is used by the
+  /// scheduler to effectively turn off scheduling.
+  void AssignOrderingToNode(const SDNode *Node);
+
   void visit(Instruction &I);
 
   void visit(unsigned Opcode, User &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2bec964..eead526 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -457,6 +457,21 @@ public:
 };
 }
 
+/// TrivialTruncElim - Eliminate some trivial nops that can result from
+/// ShrinkDemandedOps: (trunc (ext n)) -> n.
+static bool TrivialTruncElim(SDValue Op,
+                             TargetLowering::TargetLoweringOpt &TLO) {
+  SDValue N0 = Op.getOperand(0);
+  EVT VT = Op.getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND) &&
+      N0.getOperand(0).getValueType() == VT) {
+    return TLO.CombineTo(Op, N0.getOperand(0));
+  }
+  return false;
+}
+
 /// ShrinkDemandedOps - A late transformation pass that shrink expressions
 /// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
 /// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
@@ -489,7 +504,9 @@ void SelectionDAGISel::ShrinkDemandedOps() {
       APInt Demanded = APInt::getAllOnesValue(BitWidth);
       APInt KnownZero, KnownOne;
       if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
-                                   KnownZero, KnownOne, TLO)) {
+                                   KnownZero, KnownOne, TLO) ||
+          (N->getOpcode() == ISD::TRUNCATE &&
+           TrivialTruncElim(SDValue(N, 0), TLO))) {
         // Revisit the node.
         Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
                        Worklist.end());
@@ -801,7 +818,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       // landing pad can thus be detected via the MachineModuleInfo.
       unsigned LabelID = MMI->addLandingPad(BB);
 
-      const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);
+      const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
       BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
 
       // Mark exception register as live in.
@@ -953,7 +970,7 @@ SelectionDAGISel::FinishBasicBlock() {
       SDB->BitTestCases.empty()) {
     for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
       MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
-      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+      assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
@@ -1000,7 +1017,7 @@ SelectionDAGISel::FinishBasicBlock() {
     for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
       MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
-      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+      assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
@@ -1056,7 +1073,7 @@ SelectionDAGISel::FinishBasicBlock() {
     for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
       MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
-      assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+      assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
       if (PHIBB == SDB->JTCases[i].second.Default) {
@@ -1079,7 +1096,7 @@ SelectionDAGISel::FinishBasicBlock() {
   // need to update PHI nodes in that block.
   for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
     MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
-    assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+    assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
       PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
@@ -1116,7 +1133,7 @@ SelectionDAGISel::FinishBasicBlock() {
       // BB may have been removed from the CFG if a branch was constant folded.
       if (ThisBB->isSuccessor(BB)) {
         for (MachineBasicBlock::iterator Phi = BB->begin();
-             Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI;
+             Phi != BB->end() && Phi->isPHI();
              ++Phi) {
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
@@ -1324,8 +1341,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
 /// isNonImmUse - Start searching from Root up the DAG to check is Def can
 /// be reached. Return true if that's the case. However, ignore direct uses
 /// by ImmedUse (which would be U in the example illustrated in
-/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
-/// case).
+/// IsLegalToFold) and by Root (which can happen in the store case).
 /// FIXME: to be really generic, we should allow direct use by any node
 /// that is being folded. But realisticly since we only fold loads which
 /// have one non-chain use, we only need to watch out for load/op/store
@@ -1336,11 +1352,17 @@ static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
   return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
 }
 
-/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
-/// U can be folded during instruction selection that starts at Root and
-/// folding N is profitable.
-bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                  SDNode *Root) const {
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+                                          SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+  return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -1394,7 +1416,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
     VT = Root->getValueType(Root->getNumValues()-1);
   }
 
-  return !isNonImmUse(Root, N, U);
+  return !isNonImmUse(Root, N.getNode(), U);
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -1410,15 +1432,14 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
 }
 
 SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
-  return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF,
-                              N->getValueType(0));
+  return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
 }
 
 SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
   SDValue Chain = N->getOperand(0);
   unsigned C = cast<LabelSDNode>(N)->getLabelID();
   SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL,
+  return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL,
                               MVT::Other, Tmp, Chain);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 81c51c4..e88af4f 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -21,6 +22,8 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -507,7 +510,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
     
   IsLittleEndian = TD->isLittleEndian();
-  UsesGlobalOffsetTable = false;
   ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
@@ -538,6 +540,24 @@ TargetLowering::~TargetLowering() {
   delete &TLOF;
 }
 
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                        unsigned &NumIntermediates,
                                        EVT &RegisterVT,
@@ -682,7 +702,7 @@ void TargetLowering::computeRegisterProperties() {
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
         if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts) {
+            SVT.getVectorNumElements() > NElts && NElts != 1) {
           TransformToType[i] = SVT;
           ValueTypeActions.setTypeAction(VT, Promote);
           IsLegalWiderType = true;
@@ -793,13 +813,40 @@ unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
   return TD->getCallFrameTypeAlignment(Ty);
 }
 
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+  // In non-pic modes, just use the address of a block.
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return MachineJumpTableInfo::EK_BlockAddress;
+  
+  // In PIC mode, if the target supports a GPRel32 directive, use it.
+  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+  
+  // Otherwise, use a label difference.
+  return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                  SelectionDAG &DAG) const {
-  if (usesGlobalOffsetTable())
+  // If our PIC model is GP relative, use the global offset table as the base.
+  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
   return Table;
 }
 
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                             unsigned JTI,MCContext &Ctx) const{
+  // The normal PIC reloc base is the label at the start of the jump table.
+  return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
 bool
 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // Assume that everything is safe in static mode.
@@ -1669,7 +1716,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                         Lod->getSrcValue(), 
                                         Lod->getSrcValueOffset() + bestOffset,
-                                        false, NewAlign);
+                                        false, false, NewAlign);
           return DAG.getSetCC(dl, VT, 
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2337,7 +2384,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
     
-    // If none of the the value types for this register class are valid, we 
+    // If none of the value types for this register class are valid, we 
     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
     bool isLegal = false;
     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 27d429b..e7b0cff 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -197,7 +197,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
 
   SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
   // We are about to delete CopyMI, so need to remove it as the 'instruction
-  // that defines this value #'. Update the the valnum with the new defining
+  // that defines this value #'. Update the valnum with the new defining
   // instruction #.
   BValNo->def  = FillerStart;
   BValNo->setCopy(0);
@@ -375,8 +375,9 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
 
   // If some of the uses of IntA.reg is already coalesced away, return false.
   // It's not possible to determine whether it's safe to perform the coalescing.
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
-         UE = mri_->use_end(); UI != UE; ++UI) {
+  for (MachineRegisterInfo::use_nodbg_iterator UI = 
+         mri_->use_nodbg_begin(IntA.reg), 
+       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
     SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
@@ -430,6 +431,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     ++UI;
     if (JoinedCopies.count(UseMI))
       continue;
+    if (UseMI->isDebugValue()) {
+      // FIXME These don't have an instruction index.  Not clear we have enough
+      // info to decide whether to do this replacement or not.  For now do it.
+      UseMO.setReg(NewReg);
+      continue;
+    }
     SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
@@ -659,7 +666,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     return false;
   if (TID.getNumDefs() != 1)
     return false;
-  if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+  if (!DefMI->isImplicitDef()) {
     // Make sure the copy destination register class fits the instruction
     // definition register class. The mismatch can happen as a result of earlier
     // extract_subreg, insert_subreg, subreg_to_reg coalescing.
@@ -764,11 +771,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
     SubIdx = 0;
   }
 
+  // Copy the register use-list before traversing it. We may be adding operands
+  // and invalidating pointers.
+  SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
-         E = mri_->reg_end(); I != E; ) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *UseMI = &*I;
-    ++I;
+         E = mri_->reg_end(); I != E; ++I)
+    reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+
+  for (unsigned N=0; N != reglist.size(); ++N) {
+    MachineInstr *UseMI = reglist[N].first;
+    MachineOperand &O = UseMI->getOperand(reglist[N].second);
     unsigned OldSubIdx = O.getSubReg();
     if (DstIsPhys) {
       unsigned UseDstReg = DstReg;
@@ -789,6 +801,19 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
 
       O.setReg(UseDstReg);
       O.setSubReg(0);
+      if (OldSubIdx) {
+        // Def and kill of subregister of a virtual register actually defs and
+        // kills the whole register. Add imp-defs and imp-kills as needed.
+        if (O.isDef()) {
+          if(O.isDead())
+            UseMI->addRegisterDead(DstReg, tri_, true);
+          else
+            UseMI->addRegisterDefined(DstReg, tri_);
+        } else if (!O.isUndef() &&
+                   (O.isKill() ||
+                    UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
+          UseMI->addRegisterKilled(DstReg, tri_, true);
+      }
       continue;
     }
 
@@ -1029,8 +1054,9 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(DstInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_begin(DstInt.reg),
-                             mri_->use_end()) / Length) < (1.0 / Threshold)))
+      (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg),
+                             mri_->use_nodbg_end()) / Length) < 
+        (1.0 / Threshold)))
     return false;
 
   // If the virtual register live interval extends into a loop, turn down
@@ -1079,15 +1105,16 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
                                                      MachineBasicBlock *CopyMBB,
                                                      LiveInterval &DstInt,
                                                      LiveInterval &SrcInt) {
-  // If the virtual register live interval is long but it has low use desity,
+  // If the virtual register live interval is long but it has low use density,
   // do not join them, instead mark the physical register as its allocation
   // preference.
   const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
   unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
   unsigned Length = li_->getApproximateInstructionCount(SrcInt);
   if (Length > Threshold &&
-      (((float)std::distance(mri_->use_begin(SrcInt.reg),
-                             mri_->use_end()) / Length) < (1.0 / Threshold)))
+      (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg),
+                             mri_->use_nodbg_end()) / Length) < 
+          (1.0 / Threshold)))
     return false;
 
   if (SrcInt.empty())
@@ -1139,12 +1166,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
   LiveInterval &SmallInt = li_->getInterval(SmallReg);
   unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
   unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
-  if (SmallSize > Threshold || LargeSize > Threshold)
-    if ((float)std::distance(mri_->use_begin(SmallReg),
-                             mri_->use_end()) / SmallSize <
-        (float)std::distance(mri_->use_begin(LargeReg),
-                             mri_->use_end()) / LargeSize)
+  if (LargeSize > Threshold) {
+    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
+                                       mri_->use_nodbg_end());
+    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
+                                       mri_->use_nodbg_end());
+    if (SmallUses*LargeSize < LargeUses*SmallSize)
       return false;
+  }
   return true;
 }
 
@@ -1164,13 +1193,15 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
+    if (O.isDebug())
+      continue;
     MachineInstr *MI = &*I;
     if (MI == CopyMI || JoinedCopies.count(MI))
       continue;
     unsigned SubIdx = O.getSubReg();
     if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
       return true;
-    if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+    if (MI->isExtractSubreg()) {
       SubIdx = MI->getOperand(2).getImm();
       if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
         return true;
@@ -1184,8 +1215,7 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
           return true;
       }
     }
-    if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-        MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+    if (MI->isInsertSubreg() || MI->isSubregToReg()) {
       SubIdx = MI->getOperand(3).getImm();
       if (VirtReg == MI->getOperand(0).getReg()) {
         if (!tri_->getSubReg(PhysReg, SubIdx))
@@ -1296,9 +1326,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
 
   unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
-  bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
-  bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG;
-  bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG;
+  bool isExtSubReg = CopyMI->isExtractSubreg();
+  bool isInsSubReg = CopyMI->isInsertSubreg();
+  bool isSubRegToReg = CopyMI->isSubregToReg();
   unsigned SubIdx = 0;
   if (isExtSubReg) {
     DstReg    = CopyMI->getOperand(0).getReg();
@@ -1551,7 +1581,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
+      DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
+                   << SrcRC->getName() << "/"
+                   << DstRC->getName() << " -> "
+                   << NewRC->getName() << ".\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1631,8 +1664,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
         float Ratio = 1.0 / Threshold;
         if (Length > Threshold &&
-            (((float)std::distance(mri_->use_begin(JoinVReg),
-                                   mri_->use_end()) / Length) < Ratio)) {
+            (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
+                                   mri_->use_nodbg_end()) / Length) < Ratio)) {
           mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
           DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
@@ -1755,6 +1788,23 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
 
+  // If we have extended the live range of a physical register, make sure we
+  // update live-in lists as well.
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+    const LiveInterval &VRegInterval = li_->getInterval(SrcReg);
+    SmallVector<MachineBasicBlock*, 16> BlockSeq;
+    for (LiveInterval::const_iterator I = VRegInterval.begin(),
+           E = VRegInterval.end(); I != E; ++I ) {
+      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+        MachineBasicBlock &block = *BlockSeq[idx];
+        if (!block.isLiveIn(DstReg))
+          block.addLiveIn(DstReg);
+      }
+      BlockSeq.clear();
+    }
+  }
+
   // SrcReg is guarateed to be the register whose live interval that is
   // being merged.
   li_->removeInterval(SrcReg);
@@ -1849,11 +1899,11 @@ static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR,
   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
   if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
     ;
-  else if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+  else if (MI->isExtractSubreg()) {
     DstReg = MI->getOperand(0).getReg();
     SrcReg = MI->getOperand(1).getReg();
-  } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
-             MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+  } else if (MI->isSubregToReg() ||
+             MI->isInsertSubreg()) {
     DstReg = MI->getOperand(0).getReg();
     SrcReg = MI->getOperand(2).getReg();
   } else
@@ -2425,16 +2475,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
     // If this isn't a copy nor a extract_subreg, we can't join intervals.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     bool isInsUndef = false;
-    if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+    if (Inst->isExtractSubreg()) {
       DstReg = Inst->getOperand(0).getReg();
       SrcReg = Inst->getOperand(1).getReg();
-    } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+    } else if (Inst->isInsertSubreg()) {
       DstReg = Inst->getOperand(0).getReg();
       SrcReg = Inst->getOperand(2).getReg();
       if (Inst->getOperand(1).isUndef())
         isInsUndef = true;
-    } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-               Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+    } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) {
       DstReg = Inst->getOperand(0).getReg();
       SrcReg = Inst->getOperand(2).getReg();
     } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -2549,8 +2598,8 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
   return !RegClassA->contains(RegB);
 }
 
-/// lastRegisterUse - Returns the last use of the specific register between
-/// cycles Start and End or NULL if there are no uses.
+/// lastRegisterUse - Returns the last (non-debug) use of the specific register
+/// between cycles Start and End or NULL if there are no uses.
 MachineOperand *
 SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
                                           SlotIndex End,
@@ -2559,8 +2608,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
   UseIdx = SlotIndex();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     MachineOperand *LastUse = NULL;
-    for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
-           E = mri_->use_end(); I != E; ++I) {
+    for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg),
+           E = mri_->use_nodbg_end(); I != E; ++I) {
       MachineOperand &Use = I.getOperand();
       MachineInstr *UseMI = Use.getParent();
       unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
@@ -2670,10 +2719,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         // Delete all coalesced copies.
         bool DoDelete = true;
         if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
-          assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-                  MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-                  MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
-                 "Unrecognized copy instruction");
+          assert((MI->isExtractSubreg() || MI->isInsertSubreg() ||
+                  MI->isSubregToReg()) && "Unrecognized copy instruction");
           DstReg = MI->getOperand(0).getReg();
           if (TargetRegisterInfo::isPhysicalRegister(DstReg))
             // Do not delete extract_subreg, insert_subreg of physical
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 9558933..8d4d1b2 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -51,6 +51,7 @@ namespace {
     Value *PersonalityFn;
     Constant *SelectorFn;
     Constant *ExceptionFn;
+    Constant *CallSiteFn;
 
     Value *CallSite;
   public:
@@ -116,6 +117,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
   SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
   ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+  CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
   PersonalityFn = 0;
 
   return true;
@@ -143,15 +145,14 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
     }
   }
 
-  // Insert a store of the invoke num before the invoke and store zero into the
-  // location afterward.
+  // Insert a store of the invoke num before the invoke
   new StoreInst(CallSiteNoC, CallSite, true, II);  // volatile
+  CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
 
   // Add a switch case to our unwind block.
   CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
-  // We still want this to look like an invoke so we emit the LSDA properly
-  // FIXME: ??? Or will this cause strangeness with mis-matched IDs like
-  //  when it was in the front end?
+  // We still want this to look like an invoke so we emit the LSDA properly,
+  // so we don't transform the invoke into a call here.
 }
 
 /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index a23efb2..6110ef5 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
 
   push_back(createEntry(0, index));
 
-  // Iterate over the the function.
+  // Iterate over the function.
   for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
        mbbItr != mbbEnd; ++mbbItr) {
     MachineBasicBlock *mbb = &*mbbItr;
@@ -107,8 +107,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
 
     for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
          miItr != miEnd; ++miItr) {
-      MachineInstr *mi = &*miItr;
-      if (mi->getOpcode()==TargetInstrInfo::DEBUG_VALUE)
+      MachineInstr *mi = miItr;
+      if (mi->isDebugValue())
         continue;
 
       if (miItr == mbb->getFirstTerminator()) {
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 48bb5af..8a6a727 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const {
 
         if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
           // We apparently only care about character arrays.
-          if (!AT->getElementType()->isInteger(8))
+          if (!AT->getElementType()->isIntegerTy(8))
             continue;
 
           // If an array has more than SSPBufferSize bytes of allocated space,
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 2170703..12d38f0 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -504,10 +504,8 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
 
         // Abort the use is actually a sub-register def. We don't have enough
         // information to figure out if it is really legal.
-        if (MO.getSubReg() ||
-            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
-            TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
-            TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+        if (MO.getSubReg() || MII->isExtractSubreg() ||
+            MII->isInsertSubreg() || MII->isSubregToReg())
           return false;
 
         const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
@@ -569,8 +567,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
 
         // Abort the use is actually a sub-register use. We don't have enough
         // information to figure out if it is really legal.
-        if (MO.getSubReg() ||
-            TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)
+        if (MO.getSubReg() || MII->isExtractSubreg())
           return false;
 
         const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index bd7cb75..f8f6a55 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -49,7 +49,7 @@ namespace {
     std::map<unsigned, std::vector<unsigned> > Stacks;
     
     // Registers in UsedByAnother are PHI nodes that are themselves
-    // used as operands to another another PHI node
+    // used as operands to another PHI node
     std::set<unsigned> UsedByAnother;
     
     // RenameSets are the is a map from a PHI-defined register
@@ -419,7 +419,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
   
   // Iterate over all the PHI nodes in this block
   MachineBasicBlock::iterator P = MBB->begin();
-  while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) {
+  while (P != MBB->end() && P->isPHI()) {
     unsigned DestReg = P->getOperand(0).getReg();
     
     // Don't both doing PHI elimination for dead PHI's.
@@ -452,7 +452,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
       
       // We don't need to insert copies for implicit_defs.
       MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
-      if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+      if (DefMI->isImplicitDef())
         ProcessedNames.insert(SrcReg);
     
       // Check for trivial interferences via liveness information, allowing us
@@ -470,7 +470,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
       if (isLiveIn(SrcReg, P->getParent(), LI) ||
           isLiveOut(P->getOperand(0).getReg(),
                     MRI.getVRegDef(SrcReg)->getParent(), LI) ||
-          ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI &&
+          ( MRI.getVRegDef(SrcReg)->isPHI() &&
             isLiveIn(P->getOperand(0).getReg(),
                      MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
           ProcessedNames.count(SrcReg) ||
@@ -810,7 +810,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
   // Rewrite register uses from Stacks
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
       I != E; ++I) {
-    if (I->getOpcode() == TargetInstrInfo::PHI)
+    if (I->isPHI())
       continue;
     
     for (unsigned i = 0; i < I->getNumOperands(); ++i)
@@ -907,8 +907,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   
   // Determine which phi node operands need copies
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (!I->empty() &&
-        I->begin()->getOpcode() == TargetInstrInfo::PHI)
+    if (!I->empty() && I->begin()->isPHI())
       processBlock(I);
   
   // Break interferences where two different phis want to coalesce
@@ -996,7 +995,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
          BI != BE; ++BI)
-      if (BI->getOpcode() == TargetInstrInfo::PHI)
+      if (BI->isPHI())
         phis.push_back(BI);
   }
   
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index d6860bc..3223e53 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -121,7 +121,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
                                                 MBB->pred_end());
     MachineBasicBlock::iterator MI = MBB->begin();
     while (MI != MBB->end()) {
-      if (MI->getOpcode() != TargetInstrInfo::PHI)
+      if (!MI->isPHI())
         break;
       for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
              PE = Preds.end(); PI != PE; ++PI) {
@@ -378,7 +378,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
     MachineBasicBlock *SuccBB = *SI;
     for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
          II != EE; ++II) {
-      if (II->getOpcode() != TargetInstrInfo::PHI)
+      if (!II->isPHI())
         break;
       unsigned Idx = 0;
       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
@@ -403,26 +403,45 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->RemoveOperand(i);
           }
         }
-        II->RemoveOperand(Idx+1);
-        II->RemoveOperand(Idx);
-      }
+      } else
+        Idx = 0;
+
+      // If Idx is set, the operands at Idx and Idx+1 must be removed.
+      // We reuse the location to avoid expensive RemoveOperand calls.
+
       DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
       if (LI != SSAUpdateVals.end()) {
         // This register is defined in the tail block.
         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = LI->second[j].first;
           unsigned SrcReg = LI->second[j].second;
-          II->addOperand(MachineOperand::CreateReg(SrcReg, false));
-          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(SrcReg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
         }
       } else {
         // Live in tail block, must also be live in predecessors.
         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = TDBBs[j];
-          II->addOperand(MachineOperand::CreateReg(Reg, false));
-          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(Reg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(Reg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
         }
       }
+      if (Idx != 0) {
+        II->RemoveOperand(Idx+1);
+        II->RemoveOperand(Idx);
+      }
     }
   }
 }
@@ -476,7 +495,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (InstrCount == MaxDuplicateCount) return false;
     // Remember if we saw a call.
     if (I->getDesc().isCall()) HasCall = true;
-    if (I->getOpcode() != TargetInstrInfo::PHI)
+    if (!I->isPHI())
       InstrCount += 1;
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
@@ -528,7 +547,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     while (I != TailBB->end()) {
       MachineInstr *MI = &*I;
       ++I;
-      if (MI->getOpcode() == TargetInstrInfo::PHI) {
+      if (MI->isPHI()) {
         // Replace the uses of the def of the PHI with the register coming
         // from PredBB.
         ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
@@ -580,7 +599,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
       SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
       MachineBasicBlock::iterator I = TailBB->begin();
       // Process PHI instructions first.
-      while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+      while (I != TailBB->end() && I->isPHI()) {
         // Replace the uses of the def of the PHI with the register coming
         // from PredBB.
         MachineInstr *MI = &*I++;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 0000000..190b533
--- /dev/null
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,874 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                                  ELF
+//===----------------------------------------------------------------------===//
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
+  // If we have the section uniquing map, free it.
+  delete (ELFUniqueMapTy*)UniquingMap;
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, bool IsExplicit) const {
+  if (UniquingMap == 0)
+    UniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionELF *&Entry = Map[Section];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
+                                      getContext());
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((ELFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  BSSSection =
+    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getBSS());
+
+  TextSection =
+    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getText());
+
+  DataSection =
+    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getDataRel());
+
+  ReadOnlySection =
+    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC,
+                  SectionKind::getReadOnly());
+
+  TLSDataSection =
+    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+
+  TLSBSSSection =
+    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+
+  DataRelSection =
+    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  DataRelLocalSection =
+    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRelLocal());
+
+  DataRelROSection =
+    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRel());
+
+  DataRelROLocalSection =
+    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRelLocal());
+
+  MergeableConst4Section =
+    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst4());
+
+  MergeableConst8Section =
+    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst8());
+
+  MergeableConst16Section =
+    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst16());
+
+  StaticCtorSection =
+    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  StaticDtorSection =
+    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Exception Handling Sections.
+
+  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+  // it contains relocatable pointers.  In PIC mode, this is probably a big
+  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+  EHFrameSection =
+    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Debug Info Sections.
+  DwarfAbbrevSection =
+    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfInfoSection =
+    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLineSection =
+    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfFrameSection =
+    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfStrSection =
+    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLocSection =
+    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfARangesSection =
+    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfRangesSection =
+    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+}
+
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+  if (Name.empty() || Name[0] != '.') return K;
+
+  // Some lame default implementation based on some magic section names.
+  if (Name == ".bss" ||
+      Name.startswith(".bss.") ||
+      Name.startswith(".gnu.linkonce.b.") ||
+      Name.startswith(".llvm.linkonce.b.") ||
+      Name == ".sbss" ||
+      Name.startswith(".sbss.") ||
+      Name.startswith(".gnu.linkonce.sb.") ||
+      Name.startswith(".llvm.linkonce.sb."))
+    return SectionKind::getBSS();
+
+  if (Name == ".tdata" ||
+      Name.startswith(".tdata.") ||
+      Name.startswith(".gnu.linkonce.td.") ||
+      Name.startswith(".llvm.linkonce.td."))
+    return SectionKind::getThreadData();
+
+  if (Name == ".tbss" ||
+      Name.startswith(".tbss.") ||
+      Name.startswith(".gnu.linkonce.tb.") ||
+      Name.startswith(".llvm.linkonce.tb."))
+    return SectionKind::getThreadBSS();
+
+  return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+  if (Name == ".init_array")
+    return MCSectionELF::SHT_INIT_ARRAY;
+
+  if (Name == ".fini_array")
+    return MCSectionELF::SHT_FINI_ARRAY;
+
+  if (Name == ".preinit_array")
+    return MCSectionELF::SHT_PREINIT_ARRAY;
+
+  if (K.isBSS() || K.isThreadBSS())
+    return MCSectionELF::SHT_NOBITS;
+
+  return MCSectionELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |= MCSectionELF::SHF_ALLOC;
+
+  if (K.isText())
+    Flags |= MCSectionELF::SHF_EXECINSTR;
+
+  if (K.isWriteable())
+    Flags |= MCSectionELF::SHF_WRITE;
+
+  if (K.isThreadLocal())
+    Flags |= MCSectionELF::SHF_TLS;
+
+  // K.isMergeableConst() is left out to honour PR4650
+  if (K.isMergeableCString() || K.isMergeableConst4() ||
+      K.isMergeableConst8() || K.isMergeableConst16())
+    Flags |= MCSectionELF::SHF_MERGE;
+
+  if (K.isMergeableCString())
+    Flags |= MCSectionELF::SHF_STRINGS;
+
+  return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  StringRef SectionName = GV->getSection();
+
+  // Infer section flags from the section name if we can.
+  Kind = getELFKindForNamedSection(SectionName, Kind);
+
+  return getELFSection(SectionName,
+                       getELFSectionType(SectionName, Kind),
+                       getELFSectionFlags(Kind), Kind, true);
+}
+
+static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".gnu.linkonce.t.";
+  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
+
+  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
+  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
+
+  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
+  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
+  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".gnu.linkonce.d.rel.ro.";
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name;
+    Name.append(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
+                         getELFSectionFlags(Kind), Kind);
+  }
+
+  if (Kind.isText()) return TextSection;
+
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString() ||
+      Kind.isMergeable4ByteCString()) {
+
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align =
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+    const char *SizeSpec = ".rodata.str1.";
+    if (Kind.isMergeable2ByteCString())
+      SizeSpec = ".rodata.str2.";
+    else if (Kind.isMergeable4ByteCString())
+      SizeSpec = ".rodata.str4.";
+    else
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+    std::string Name = SizeSpec + utostr(Align);
+    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+                         MCSectionELF::SHF_ALLOC |
+                         MCSectionELF::SHF_MERGE |
+                         MCSectionELF::SHF_STRINGS,
+                         Kind);
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4() && MergeableConst4Section)
+      return MergeableConst4Section;
+    if (Kind.isMergeableConst8() && MergeableConst8Section)
+      return MergeableConst8Section;
+    if (Kind.isMergeableConst16() && MergeableConst16Section)
+      return MergeableConst16Section;
+    return ReadOnlySection;  // .const
+  }
+
+  if (Kind.isReadOnly())             return ReadOnlySection;
+
+  if (Kind.isThreadData())           return TLSDataSection;
+  if (Kind.isThreadBSS())            return TLSBSSSection;
+
+  // Note: we claim that common symbols are put in BSSSection, but they are
+  // really emitted with the magic .comm directive, which creates a symbol table
+  // entry but not a section.
+  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+  if (Kind.isDataNoRel())            return DataSection;
+  if (Kind.isDataRelLocal())         return DataRelLocalSection;
+  if (Kind.isDataRel())              return DataRelSection;
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return MergeableConst4Section;
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return MergeableConst8Section;
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return MergeableConst16Section;
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    }
+
+    return TargetLoweringObjectFile::
+      getSymbolForDwarfReference(Sym, MMI,
+                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+  }
+
+  return TargetLoweringObjectFile::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+//===----------------------------------------------------------------------===//
+//                                 MachO
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+
+TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSectionMachO *TargetLoweringObjectFileMachO::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) const {
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
+
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
+                                        Reserved2, Kind, getContext());
+}
+
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((MachOUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  TextSection // .text
+    = getMachOSection("__TEXT", "__text",
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  DataSection // .data
+    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+
+  CStringSection // .cstring
+    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
+                      SectionKind::getMergeable1ByteCString());
+  UStringSection
+    = getMachOSection("__TEXT","__ustring", 0,
+                      SectionKind::getMergeable2ByteCString());
+  FourByteConstantSection // .literal4
+    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
+                      SectionKind::getMergeableConst4());
+  EightByteConstantSection // .literal8
+    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
+                      SectionKind::getMergeableConst8());
+
+  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+  // to using it in -static mode.
+  SixteenByteConstantSection = 0;
+  if (TM.getRelocationModel() != Reloc::Static &&
+      TM.getTargetData()->getPointerSize() == 32)
+    SixteenByteConstantSection =   // .literal16
+      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
+                      SectionKind::getMergeableConst16());
+
+  ReadOnlySection  // .const
+    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+
+  TextCoalSection
+    = getMachOSection("__TEXT", "__textcoal_nt",
+                      MCSectionMachO::S_COALESCED |
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  ConstTextCoalSection
+    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataCoalSection
+    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataSection  // .const_data
+    = getMachOSection("__DATA", "__const", 0,
+                      SectionKind::getReadOnlyWithRel());
+  DataCoalSection
+    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
+                      SectionKind::getDataRel());
+  DataCommonSection
+    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
+                      SectionKind::getBSS());
+  DataBSSSection
+    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                    SectionKind::getBSS());
+  
+
+  LazySymbolPointerSection
+    = getMachOSection("__DATA", "__la_symbol_ptr",
+                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+  NonLazySymbolPointerSection
+    = getMachOSection("__DATA", "__nl_symbol_ptr",
+                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection
+      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+  } else {
+    StaticCtorSection
+      = getMachOSection("__DATA", "__mod_init_func",
+                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__DATA", "__mod_term_func",
+                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+  }
+
+  // Exception Handling.
+  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+                                SectionKind::getDataRel());
+  EHFrameSection =
+    getMachOSection("__TEXT", "__eh_frame",
+                    MCSectionMachO::S_COALESCED |
+                    MCSectionMachO::S_ATTR_NO_TOC |
+                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                    SectionKind::getReadOnly());
+
+  // Debug Information.
+  DwarfAbbrevSection =
+    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfInfoSection =
+    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLineSection =
+    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfFrameSection =
+    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfStrSection =
+    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLocSection =
+    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfARangesSection =
+    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfRangesSection =
+    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfDebugInlineSection =
+    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  // Parse the section specifier and create it if valid.
+  StringRef Segment, Section;
+  unsigned TAA, StubSize;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+                                          TAA, StubSize);
+  if (!ErrorCode.empty()) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' has an invalid section specifier '" + GV->getSection()+
+                      "': " + ErrorCode + ".");
+    // Fall back to dropping it into the data section.
+    return DataSection;
+  }
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+  // Okay, now that we got the section, verify that the TAA & StubSize agree.
+  // If the user declared multiple globals with different section flags, we need
+  // to reject it here.
+  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' section type or attributes does not match previous"
+                      " section specifier");
+  }
+
+  return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+
+  if (Kind.isText())
+    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+  // If this is weak/linkonce, put this in a coalescable section, either in text
+  // or data depending on if it is writable.
+  if (GV->isWeakForLinker()) {
+    if (Kind.isReadOnly())
+      return ConstTextCoalSection;
+    return DataCoalSection;
+  }
+
+  // FIXME: Alignment check should be handled by section classifier.
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString()) {
+    if (TM.getTargetData()->getPreferredAlignment(
+                                              cast<GlobalVariable>(GV)) < 32) {
+      if (Kind.isMergeable1ByteCString())
+        return CStringSection;
+      assert(Kind.isMergeable2ByteCString());
+      return UStringSection;
+    }
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4())
+      return FourByteConstantSection;
+    if (Kind.isMergeableConst8())
+      return EightByteConstantSection;
+    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+      return SixteenByteConstantSection;
+  }
+
+  // Otherwise, if it is readonly, but not something we can specially optimize,
+  // just drop it in .const.
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  // If this is marked const, put it into a const section.  But if the dynamic
+  // linker needs to write to it, put it in the data segment.
+  if (Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  // Put zero initialized globals with strong external linkage in the
+  // DATA, __common section with the .zerofill directive.
+  if (Kind.isBSSExtern())
+    return DataCommonSection;
+
+  // Put zero initialized globals with local linkage in __DATA,__bss directive
+  // with the .zerofill directive (aka .lcomm).
+  if (Kind.isBSSLocal())
+    return DataBSSSection;
+  
+  // Otherwise, just drop the variable in the normal data section.
+  return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+  // If this constant requires a relocation, we have to put it in the data
+  // segment, not in the text segment.
+  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  if (Kind.isMergeableConst4())
+    return FourByteConstantSection;
+  if (Kind.isMergeableConst8())
+    return EightByteConstantSection;
+  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    return SixteenByteConstantSection;
+  return ReadOnlySection;  // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+  /// On Darwin, internally linked data beginning with "L" or "l" does not have
+  /// the directive emitted (this occurs in ObjC metadata).
+  if (!GV) return false;
+
+  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+    // FIXME: ObjC metadata is currently emitted as internal symbols that have
+    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
+    // this horrible hack can go away.
+    SmallString<64> Name;
+    Mang->getNameWithPrefix(Name, GV, false);
+    if (Name[0] == 'L' || Name[0] == 'l')
+      return false;
+  }
+
+  return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+
+    return TargetLoweringObjectFile::
+      getSymbolForDwarfReference(Sym, MMI,
+                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+  }
+
+  return TargetLoweringObjectFile::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                  COFF
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
+  delete (COFFUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionCOFF *&Entry = Map[Name];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((COFFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
+  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+  StaticCtorSection =
+    getCOFFSection(".ctors", false, SectionKind::getDataRel());
+  StaticDtorSection =
+    getCOFFSection(".dtors", false, SectionKind::getDataRel());
+
+  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+  // though it contains relocatable pointers.  In PIC mode, this is probably a
+  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+  EHFrameSection =
+    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+
+  // Debug info.
+  // FIXME: Don't use 'directive' mode here.
+  DwarfAbbrevSection =
+    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfInfoSection =
+    getCOFFSection("\t.section\t.debug_info,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLineSection =
+    getCOFFSection("\t.section\t.debug_line,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfFrameSection =
+    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfStrSection =
+    getCOFFSection("\t.section\t.debug_str,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLocSection =
+    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfARangesSection =
+    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfRangesSection =
+    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
+                   true, SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  return getCOFFSection(GV->getSection(), false, Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text$linkonce";
+  if (Kind.isWriteable())
+    return ".data$linkonce";
+  return ".rdata$linkonce";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    return getCOFFSection(Name.str(), false, Kind);
+  }
+
+  if (Kind.isText())
+    return getTextSection();
+
+  return getDataSection();
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a3f6364..6f4ca82 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -213,6 +213,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   unsigned NumVisited = 0;
   for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
     MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
     if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
     ++NumVisited;
@@ -316,7 +319,7 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
          E = MRI->reg_end(); I != E; ++I) {
     MachineOperand &MO = I.getOperand();
     MachineInstr *MI = MO.getParent();
-    if (MI->getParent() != MBB)
+    if (MI->getParent() != MBB || MI->isDebugValue())
       continue;
     DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
     if (DI == DistanceMap.end())
@@ -339,7 +342,7 @@ MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
          E = MRI->reg_end(); I != E; ++I) {
     MachineOperand &MO = I.getOperand();
     MachineInstr *MI = MO.getParent();
-    if (MI->getParent() != MBB)
+    if (MI->getParent() != MBB || MI->isDebugValue())
       continue;
     DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
     if (DI == DistanceMap.end())
@@ -365,13 +368,13 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
   DstReg = 0;
   unsigned SrcSubIdx, DstSubIdx;
   if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
-    if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+    if (MI.isExtractSubreg()) {
       DstReg = MI.getOperand(0).getReg();
       SrcReg = MI.getOperand(1).getReg();
-    } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+    } else if (MI.isInsertSubreg()) {
       DstReg = MI.getOperand(0).getReg();
       SrcReg = MI.getOperand(2).getReg();
-    } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+    } else if (MI.isSubregToReg()) {
       DstReg = MI.getOperand(0).getReg();
       SrcReg = MI.getOperand(2).getReg();
     }
@@ -429,8 +432,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
 /// as a two-address use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
   const TargetInstrDesc &TID = MI.getDesc();
-  unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM)
-    ? MI.getNumOperands() : TID.getNumOperands();
+  unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands();
   for (unsigned i = 0; i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -452,11 +454,11 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
                                      const TargetInstrInfo *TII,
                                      bool &IsCopy,
                                      unsigned &DstReg, bool &IsDstPhys) {
-  MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
-  if (UI == MRI->use_end())
+  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg);
+  if (UI == MRI->use_nodbg_end())
     return 0;
   MachineInstr &UseMI = *UI;
-  if (++UI != MRI->use_end())
+  if (++UI != MRI->use_nodbg_end())
     // More than one use.
     return 0;
   if (UseMI.getParent() != MBB)
@@ -924,6 +926,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
     for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = llvm::next(mi);
+      if (mi->isDebugValue()) {
+        mi = nmi;
+        continue;
+      }
       const TargetInstrDesc &TID = mi->getDesc();
       bool FirstTied = true;
 
@@ -933,7 +939,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
-      unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
+      unsigned NumOps = mi->isInlineAsm()
         ? mi->getNumOperands() : TID.getNumOperands();
       for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
         unsigned DstIdx = 0;
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 6ab5db2..b0f0a07 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -148,8 +148,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         MachineBasicBlock* succ = *BB->succ_begin();
 
         MachineBasicBlock::iterator start = succ->begin();
-        while (start != succ->end() &&
-               start->getOpcode() == TargetInstrInfo::PHI) {
+        while (start != succ->end() && start->isPHI()) {
           for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
             if (start->getOperand(i).isMBB() &&
                 start->getOperand(i).getMBB() == BB) {
@@ -188,8 +187,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
     SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
                                              BB->pred_end());
     MachineBasicBlock::iterator phi = BB->begin();
-    while (phi != BB->end() &&
-           phi->getOpcode() == TargetInstrInfo::PHI) {
+    while (phi != BB->end() && phi->isPHI()) {
       for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
         if (!preds.count(phi->getOperand(i).getMBB())) {
           phi->RemoveOperand(i);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index d4fb2e4..5956b61 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -9,7 +9,7 @@
 //
 // This file implements the VirtRegMap class.
 //
-// It also contains implementations of the the Spiller interface, which, given a
+// It also contains implementations of the Spiller interface, which, given a
 // virtual register map and a machine function, eliminates all virtual
 // references by replacing them with physical register references - adding spill
 // code as necessary.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index df2b8d2..84e0398 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -62,6 +62,7 @@ VirtRegRewriter::~VirtRegRewriter() {}
 
 /// substitutePhysReg - Replace virtual register in MachineOperand with a
 /// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
 static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
                               const TargetRegisterInfo &TRI) {
   if (unsigned SubIdx = MO.getSubReg()) {
@@ -123,14 +124,15 @@ struct TrivialRewriter : public VirtRegRewriter {
           continue;
         unsigned pReg = VRM.getPhys(reg);
         mri->setPhysRegUsed(pReg);
-        for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg),
-             regEnd = mri->reg_end(); regItr != regEnd;) {
-          MachineOperand &mop = regItr.getOperand();
-          assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
-          ++regItr;
-          substitutePhysReg(mop, pReg, *tri);
-          changed = true;
-        }
+        // Copy the register use-list before traversing it.
+        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+               E = mri->reg_end(); I != E; ++I)
+          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+        for (unsigned N=0; N != reglist.size(); ++N)
+          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+                            pReg, *tri);
+        changed |= !reglist.empty();
       }
     }
     
@@ -1759,7 +1761,7 @@ private:
 
             // Mark is killed.
             MachineInstr *CopyMI = prior(InsertLoc);
-            CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
+            CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
             MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -1850,31 +1852,30 @@ private:
       KilledMIRegs.clear();
       for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
         unsigned i = VirtUseOps[j];
-        MachineOperand &MO = MI.getOperand(i);
-        unsigned VirtReg = MO.getReg();
+        unsigned VirtReg = MI.getOperand(i).getReg();
         assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
                "Not a virtual register?");
 
-        unsigned SubIdx = MO.getSubReg();
+        unsigned SubIdx = MI.getOperand(i).getSubReg();
         if (VRM.isAssignedReg(VirtReg)) {
           // This virtual register was assigned a physreg!
           unsigned Phys = VRM.getPhys(VirtReg);
           RegInfo->setPhysRegUsed(Phys);
-          if (MO.isDef())
+          if (MI.getOperand(i).isDef())
             ReusedOperands.markClobbered(Phys);
-          substitutePhysReg(MO, Phys, *TRI);
+          substitutePhysReg(MI.getOperand(i), Phys, *TRI);
           if (VRM.isImplicitlyDefined(VirtReg))
             // FIXME: Is this needed?
             BuildMI(MBB, &MI, MI.getDebugLoc(),
-                    TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys);
+                    TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
           continue;
         }
 
         // This virtual register is now known to be a spilled value.
-        if (!MO.isUse())
+        if (!MI.getOperand(i).isUse())
           continue;  // Handle defs in the loop below (handle use&def here though)
 
-        bool AvoidReload = MO.isUndef();
+        bool AvoidReload = MI.getOperand(i).isUndef();
         // Check if it is defined by an implicit def. It should not be spilled.
         // Note, this is for correctness reason. e.g.
         // 8   %reg1024<def> = IMPLICIT_DEF
@@ -1902,8 +1903,7 @@ private:
         //       = EXTRACT_SUBREG fi#1
         // fi#1 is available in EDI, but it cannot be reused because it's not in
         // the right register file.
-        if (PhysReg && !AvoidReload &&
-            (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) {
+        if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
           const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
           if (!RC->contains(PhysReg))
             PhysReg = 0;
@@ -2038,7 +2038,7 @@ private:
           TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
 
           MachineInstr *CopyMI = prior(InsertLoc);
-          CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
+          CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
           UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
           // This invalidates DesignatedReg.
@@ -2167,7 +2167,7 @@ private:
                 // virtual or needing to clobber any values if it's physical).
                 NextMII = &MI;
                 --NextMII;  // backtrack to the copy.
-                NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
+                NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
                 // Propagate the sub-register index over.
                 if (SubIdx) {
                   DefMO = NextMII->findRegisterDefOperand(DestReg);
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 56e5b9c..524607b 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -33,9 +33,11 @@ using namespace llvmc;
 namespace llvmc {
 
   const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
-    LanguageMap::const_iterator Lang = this->find(File.getSuffix());
+    StringRef suf = File.getSuffix();
+    LanguageMap::const_iterator Lang = this->find(suf);
     if (Lang == this->end())
-      throw std::runtime_error(("Unknown suffix: " + File.getSuffix()).str());
+      throw std::runtime_error("File '" + File.str() +
+                                "' has unknown suffix '" + suf.str() + '\'');
     return Lang->second;
   }
 }
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
index a5ecfd5..66c6d11 100644
--- a/lib/CompilerDriver/Makefile
+++ b/lib/CompilerDriver/Makefile
@@ -22,6 +22,7 @@ else
 endif
 
 REQUIRES_EH := 1
+REQUIRES_RTTI := 1
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 89c4290..6db3ef9 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
@@ -36,25 +35,29 @@ using namespace llvm;
 STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
 STATISTIC(NumGlobals  , "Number of global vars initialized");
 
-ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP,
-                                             std::string *ErrorStr,
-                                             JITMemoryManager *JMM,
-                                             CodeGenOpt::Level OptLevel,
-                                             bool GVsWithCode,
-					     CodeModel::Model CMM) = 0;
-ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP,
+ExecutionEngine *(*ExecutionEngine::JITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  CodeModel::Model CMM,
+  StringRef MArch,
+  StringRef MCPU,
+  const SmallVectorImpl<std::string>& MAttrs) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
                                                 std::string *ErrorStr) = 0;
 ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
 
 
-ExecutionEngine::ExecutionEngine(ModuleProvider *P)
+ExecutionEngine::ExecutionEngine(Module *M)
   : EEState(*this),
     LazyFunctionCreator(0) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
-  Modules.push_back(P);
-  assert(P && "ModuleProvider is null?");
+  Modules.push_back(M);
+  assert(M && "Module is null?");
 }
 
 ExecutionEngine::~ExecutionEngine() {
@@ -69,38 +72,18 @@ char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) {
   return new char[GVSize];
 }
 
-/// removeModuleProvider - Remove a ModuleProvider from the list of modules.
-/// Relases the Module from the ModuleProvider, materializing it in the
-/// process, and returns the materialized Module.
-Module* ExecutionEngine::removeModuleProvider(ModuleProvider *P, 
-                                              std::string *ErrInfo) {
-  for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(), 
+/// removeModule - Remove a Module from the list of modules.
+bool ExecutionEngine::removeModule(Module *M) {
+  for(SmallVector<Module *, 1>::iterator I = Modules.begin(), 
         E = Modules.end(); I != E; ++I) {
-    ModuleProvider *MP = *I;
-    if (MP == P) {
+    Module *Found = *I;
+    if (Found == M) {
       Modules.erase(I);
-      clearGlobalMappingsFromModule(MP->getModule());
-      return MP->releaseModule(ErrInfo);
-    }
-  }
-  return NULL;
-}
-
-/// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
-/// and deletes the ModuleProvider and owned Module.  Avoids materializing 
-/// the underlying module.
-void ExecutionEngine::deleteModuleProvider(ModuleProvider *P, 
-                                           std::string *ErrInfo) {
-  for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(), 
-      E = Modules.end(); I != E; ++I) {
-    ModuleProvider *MP = *I;
-    if (MP == P) {
-      Modules.erase(I);
-      clearGlobalMappingsFromModule(MP->getModule());
-      delete MP;
-      return;
+      clearGlobalMappingsFromModule(M);
+      return true;
     }
   }
+  return false;
 }
 
 /// FindFunctionNamed - Search all of the active modules to find the one that
@@ -108,7 +91,7 @@ void ExecutionEngine::deleteModuleProvider(ModuleProvider *P,
 /// general code.
 Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
   for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
-    if (Function *F = Modules[i]->getModule()->getFunction(FnName))
+    if (Function *F = Modules[i]->getFunction(FnName))
       return F;
   }
   return 0;
@@ -316,7 +299,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
 void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
   // Execute global ctors/dtors for each module in the program.
   for (unsigned m = 0, e = Modules.size(); m != e; ++m)
-    runStaticConstructorsDestructors(Modules[m]->getModule(), isDtors);
+    runStaticConstructorsDestructors(Modules[m], isDtors);
 }
 
 #ifndef NDEBUG
@@ -356,7 +339,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
    }
    // FALLS THROUGH
   case 1:
-   if (!FTy->getParamType(0)->isInteger(32)) {
+   if (!FTy->getParamType(0)->isIntegerTy(32)) {
      llvm_report_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
@@ -393,12 +376,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
 /// Interpreter or there's an error. If even an Interpreter cannot be created,
 /// NULL is returned.
 ///
-ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP,
+ExecutionEngine *ExecutionEngine::create(Module *M,
                                          bool ForceInterpreter,
                                          std::string *ErrorStr,
                                          CodeGenOpt::Level OptLevel,
                                          bool GVsWithCode) {
-  return EngineBuilder(MP)
+  return EngineBuilder(M)
       .setEngineKind(ForceInterpreter
                      ? EngineKind::Interpreter
                      : EngineKind::JIT)
@@ -408,16 +391,6 @@ ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP,
       .create();
 }
 
-ExecutionEngine *ExecutionEngine::create(Module *M) {
-  return EngineBuilder(M).create();
-}
-
-/// EngineBuilder - Overloaded constructor that automatically creates an
-/// ExistingModuleProvider for an existing module.
-EngineBuilder::EngineBuilder(Module *m) : MP(new ExistingModuleProvider(m)) {
-  InitEngine();
-}
-
 ExecutionEngine *EngineBuilder::create() {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
@@ -442,8 +415,9 @@ ExecutionEngine *EngineBuilder::create() {
   if (WhichEngine & EngineKind::JIT) {
     if (ExecutionEngine::JITCtor) {
       ExecutionEngine *EE =
-        ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel,
-                                 AllocateGVsWithCode, CMModel);
+        ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+                                 AllocateGVsWithCode, CMModel,
+                                 MArch, MCPU, MAttrs);
       if (EE) return EE;
     }
   }
@@ -452,7 +426,7 @@ ExecutionEngine *EngineBuilder::create() {
   // an interpreter instead.
   if (WhichEngine & EngineKind::Interpreter) {
     if (ExecutionEngine::InterpCtor)
-      return ExecutionEngine::InterpCtor(MP, ErrorStr);
+      return ExecutionEngine::InterpCtor(M, ErrorStr);
     if (ErrorStr)
       *ErrorStr = "Interpreter has not been linked in.";
     return 0;
@@ -625,18 +599,18 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       switch (Op0->getType()->getTypeID()) {
         default: llvm_unreachable("Invalid bitcast operand");
         case Type::IntegerTyID:
-          assert(DestTy->isFloatingPoint() && "invalid bitcast");
+          assert(DestTy->isFloatingPointTy() && "invalid bitcast");
           if (DestTy->isFloatTy())
             GV.FloatVal = GV.IntVal.bitsToFloat();
           else if (DestTy->isDoubleTy())
             GV.DoubleVal = GV.IntVal.bitsToDouble();
           break;
         case Type::FloatTyID: 
-          assert(DestTy->isInteger(32) && "Invalid bitcast");
+          assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
           GV.IntVal.floatToBits(GV.FloatVal);
           break;
         case Type::DoubleTyID:
-          assert(DestTy->isInteger(64) && "Invalid bitcast");
+          assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
           GV.IntVal.doubleToBits(GV.DoubleVal);
           break;
         case Type::PointerTyID:
@@ -968,7 +942,7 @@ void ExecutionEngine::emitGlobals() {
 
   if (Modules.size() != 1) {
     for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
-      Module &M = *Modules[m]->getModule();
+      Module &M = *Modules[m];
       for (Module::const_global_iterator I = M.global_begin(),
            E = M.global_end(); I != E; ++I) {
         const GlobalValue *GV = I;
@@ -1002,7 +976,7 @@ void ExecutionEngine::emitGlobals() {
   
   std::vector<const GlobalValue*> NonCanonicalGlobals;
   for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
-    Module &M = *Modules[m]->getModule();
+    Module &M = *Modules[m];
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       // In the multi-module case, see what this global maps to.
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 412b493..141cb27 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -174,20 +174,16 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
 }
 
 void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
-  unwrap(EE)->addModuleProvider(unwrap(MP));
+  unwrap(EE)->addModule(unwrap(MP));
 }
 
 LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
                                   LLVMModuleProviderRef MP,
                                   LLVMModuleRef *OutMod, char **OutError) {
-  std::string Error;
-  if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) {
-    *OutMod = wrap(Gone);
-    return 0;
-  }
-  if (OutError)
-    *OutError = strdup(Error.c_str());
-  return 1;
+  Module *M = unwrap(MP);
+  unwrap(EE)->removeModule(M);
+  *OutMod = wrap(M);
+  return 0;
 }
 
 LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 73f5558..e234cf1 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
   ECStack.pop_back();
 
   if (ECStack.empty()) {  // Finished main.  Put result into exit code...
-    if (RetTy && RetTy->isInteger()) {          // Nonvoid return type?
+    if (RetTy && RetTy->isIntegerTy()) {          // Nonvoid return type?
       ExitValue = Result;   // Capture the exit value of the program
     } else {
       memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
@@ -979,7 +979,7 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
   const Type *SrcTy = SrcVal->getType();
   uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction");
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
 
   if (SrcTy->getTypeID() == Type::FloatTyID)
     Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -993,7 +993,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
   const Type *SrcTy = SrcVal->getType();
   uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction");
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
 
   if (SrcTy->getTypeID() == Type::FloatTyID)
     Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -1005,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction");
+  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
 
   if (DstTy->getTypeID() == Type::FloatTyID)
     Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
@@ -1017,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction");
+  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
 
   if (DstTy->getTypeID() == Type::FloatTyID)
     Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
@@ -1058,24 +1058,24 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
   if (isa<PointerType>(DstTy)) {
     assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
     Dest.PointerVal = Src.PointerVal;
-  } else if (DstTy->isInteger()) {
+  } else if (DstTy->isIntegerTy()) {
     if (SrcTy->isFloatTy()) {
       Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
       Dest.IntVal.floatToBits(Src.FloatVal);
     } else if (SrcTy->isDoubleTy()) {
       Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
       Dest.IntVal.doubleToBits(Src.DoubleVal);
-    } else if (SrcTy->isInteger()) {
+    } else if (SrcTy->isIntegerTy()) {
       Dest.IntVal = Src.IntVal;
     } else 
       llvm_unreachable("Invalid BitCast");
   } else if (DstTy->isFloatTy()) {
-    if (SrcTy->isInteger())
+    if (SrcTy->isIntegerTy())
       Dest.FloatVal = Src.IntVal.bitsToFloat();
     else
       Dest.FloatVal = Src.FloatVal;
   } else if (DstTy->isDoubleTy()) {
-    if (SrcTy->isInteger())
+    if (SrcTy->isIntegerTy())
       Dest.DoubleVal = Src.IntVal.bitsToDouble();
     else
       Dest.DoubleVal = Src.DoubleVal;
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index c02d84f..7b061d3 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -368,7 +368,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT,
 
       switch (Last) {
       case '%':
-        strcpy(Buffer, "%"); break;
+        memcpy(Buffer, "%", 2); break;
       case 'c':
         sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
         break;
@@ -400,8 +400,9 @@ GenericValue lle_X_sprintf(const FunctionType *FT,
         errs() << "<unknown printf code '" << *FmtStr << "'!>";
         ArgNo++; break;
       }
-      strcpy(OutputBuffer, Buffer);
-      OutputBuffer += strlen(Buffer);
+      size_t Len = strlen(Buffer);
+      memcpy(OutputBuffer, Buffer, Len + 1);
+      OutputBuffer += Len;
       }
       break;
     }
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 9be6a92..43e3453 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -17,7 +17,6 @@
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include <cstring>
 using namespace llvm;
 
@@ -33,20 +32,20 @@ extern "C" void LLVMLinkInInterpreter() { }
 
 /// create - Create a new interpreter object.  This can never fail.
 ///
-ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr) {
-  // Tell this ModuleProvide to materialize and release the module
-  if (!MP->materializeModule(ErrStr))
+ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+  // Tell this Module to materialize everything and release the GVMaterializer.
+  if (M->MaterializeAllPermanently(ErrStr))
     // We got an error, just return 0
     return 0;
 
-  return new Interpreter(MP);
+  return new Interpreter(M);
 }
 
 //===----------------------------------------------------------------------===//
 // Interpreter ctor - Initialize stuff
 //
-Interpreter::Interpreter(ModuleProvider *M)
-  : ExecutionEngine(M), TD(M->getModule()) {
+Interpreter::Interpreter(Module *M)
+  : ExecutionEngine(M), TD(M) {
       
   memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
   setTargetData(&TD);
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 038830c..bc4200b 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -94,7 +94,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
   std::vector<Function*> AtExitHandlers;
 
 public:
-  explicit Interpreter(ModuleProvider *M);
+  explicit Interpreter(Module *M);
   ~Interpreter();
 
   /// runAtExitHandlers - Run any functions registered by the program's calls to
@@ -108,7 +108,7 @@ public:
   
   /// create - Create an interpreter ExecutionEngine. This can never fail.
   ///
-  static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0);
+  static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
 
   /// run - Start execution with the specified function and arguments.
   ///
diff --git a/lib/ExecutionEngine/Interpreter/Makefile b/lib/ExecutionEngine/Interpreter/Makefile
index 4df38ea..5def136 100644
--- a/lib/ExecutionEngine/Interpreter/Makefile
+++ b/lib/ExecutionEngine/Interpreter/Makefile
@@ -9,6 +9,5 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMInterpreter
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index faf724f..18a996e 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
-#include "llvm/ModuleProvider.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
@@ -28,6 +28,7 @@
 #include "llvm/Target/TargetJITInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Config/config.h"
@@ -172,7 +173,7 @@ void DarwinRegisterFrame(void* FrameBegin) {
   ob->encoding.i = 0; 
   ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
   
-  // Put the info on both places, as libgcc uses the first or the the second
+  // Put the info on both places, as libgcc uses the first or the second
   // field. Note that we rely on having two pointers here. If fde_end was a
   // char, things would get complicated.
   ob->fde_end = (char*)LOI->unseenObjects;
@@ -193,35 +194,44 @@ void DarwinRegisterFrame(void* FrameBegin) {
 
 /// createJIT - This is the factory method for creating a JIT for the current
 /// machine, it does not fall back to the interpreter.  This takes ownership
-/// of the module provider.
-ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP,
+/// of the module.
+ExecutionEngine *ExecutionEngine::createJIT(Module *M,
                                             std::string *ErrorStr,
                                             JITMemoryManager *JMM,
                                             CodeGenOpt::Level OptLevel,
                                             bool GVsWithCode,
-					    CodeModel::Model CMM) {
-  return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode, CMM);
+                                            CodeModel::Model CMM) {
+  // Use the defaults for extra parameters.  Users can use EngineBuilder to
+  // set them.
+  StringRef MArch = "";
+  StringRef MCPU = "";
+  SmallVector<std::string, 1> MAttrs;
+  return JIT::createJIT(M, ErrorStr, JMM, OptLevel, GVsWithCode, CMM,
+                        MArch, MCPU, MAttrs);
 }
 
-ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
+ExecutionEngine *JIT::createJIT(Module *M,
                                 std::string *ErrorStr,
                                 JITMemoryManager *JMM,
                                 CodeGenOpt::Level OptLevel,
                                 bool GVsWithCode,
-                                CodeModel::Model CMM) {
+                                CodeModel::Model CMM,
+                                StringRef MArch,
+                                StringRef MCPU,
+                                const SmallVectorImpl<std::string>& MAttrs) {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
     return 0;
 
   // Pick a target either via -march or by guessing the native arch.
-  TargetMachine *TM = JIT::selectTarget(MP, ErrorStr);
+  TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
   if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
   TM->setCodeModel(CMM);
 
   // If the target supports JIT code generation, create a the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo()) {
-    return new JIT(MP, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+    return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
   } else {
     if (ErrorStr)
       *ErrorStr = "target does not support JIT code generation";
@@ -229,16 +239,63 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
   }
 }
 
-JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+  SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
+  mutable sys::Mutex Lock;
+public:
+  void Add(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.insert(jit);
+  }
+  void Remove(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.erase(jit);
+  }
+  void *getPointerToNamedFunction(const char *Name) const {
+    MutexGuard guard(Lock);
+    assert(JITs.size() != 0 && "No Jit registered");
+    //search function in every instance of JIT
+    for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+           end = JITs.end();
+         Jit != end; ++Jit) {
+      if (Function *F = (*Jit)->FindFunctionNamed(Name))
+        return (*Jit)->getPointerToFunction(F);
+    }
+    // The function is not available : fallback on the first created (will
+    // search in symbol of the current program/library)
+    return (*JITs.begin())->getPointerToNamedFunction(Name);
+  }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+  // getPointerToNamedFunction - This function is used as a global wrapper to
+  // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+  // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+  // need to resolve function(s) that are being mis-codegenerated, so we need to
+  // resolve their addresses at runtime, and this is the way to do it.
+  void *getPointerToNamedFunction(const char *Name) {
+    return AllJits->getPointerToNamedFunction(Name);
+  }
+}
+
+JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
          JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
-  : ExecutionEngine(MP), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) {
+  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+    isAlreadyCodeGenerating(false) {
   setTargetData(TM.getTargetData());
 
-  jitstate = new JITState(MP);
+  jitstate = new JITState(M);
 
   // Initialize JCE
   JCE = createEmitter(*this, JMM, TM);
 
+  // Register in global list of all JITs.
+  AllJits->Add(this);
+
   // Add target data
   MutexGuard locked(lock);
   FunctionPassManager &PM = jitstate->getPM(locked);
@@ -273,21 +330,21 @@ JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
 }
 
 JIT::~JIT() {
+  AllJits->Remove(this);
   delete jitstate;
   delete JCE;
   delete &TM;
 }
 
-/// addModuleProvider - Add a new ModuleProvider to the JIT.  If we previously
-/// removed the last ModuleProvider, we need re-initialize jitstate with a valid
-/// ModuleProvider.
-void JIT::addModuleProvider(ModuleProvider *MP) {
+/// addModule - Add a new Module to the JIT.  If we previously removed the last
+/// Module, we need re-initialize jitstate with a valid Module.
+void JIT::addModule(Module *M) {
   MutexGuard locked(lock);
 
   if (Modules.empty()) {
     assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
 
-    jitstate = new JITState(MP);
+    jitstate = new JITState(M);
 
     FunctionPassManager &PM = jitstate->getPM(locked);
     PM.add(new TargetData(*TM.getTargetData()));
@@ -302,18 +359,17 @@ void JIT::addModuleProvider(ModuleProvider *MP) {
     PM.doInitialization();
   }
   
-  ExecutionEngine::addModuleProvider(MP);
+  ExecutionEngine::addModule(M);
 }
 
-/// removeModuleProvider - If we are removing the last ModuleProvider, 
-/// invalidate the jitstate since the PassManager it contains references a
-/// released ModuleProvider.
-Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) {
-  Module *result = ExecutionEngine::removeModuleProvider(MP, E);
+/// removeModule - If we are removing the last Module, invalidate the jitstate
+/// since the PassManager it contains references a released Module.
+bool JIT::removeModule(Module *M) {
+  bool result = ExecutionEngine::removeModule(M);
   
   MutexGuard locked(lock);
   
-  if (jitstate->getMP() == MP) {
+  if (jitstate->getModule() == M) {
     delete jitstate;
     jitstate = 0;
   }
@@ -336,62 +392,6 @@ Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) {
   return result;
 }
 
-/// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
-/// and deletes the ModuleProvider and owned Module.  Avoids materializing 
-/// the underlying module.
-void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) {
-  ExecutionEngine::deleteModuleProvider(MP, E);
-  
-  MutexGuard locked(lock);
-  
-  if (jitstate->getMP() == MP) {
-    delete jitstate;
-    jitstate = 0;
-  }
-
-  if (!jitstate && !Modules.empty()) {
-    jitstate = new JITState(Modules[0]);
-    
-    FunctionPassManager &PM = jitstate->getPM(locked);
-    PM.add(new TargetData(*TM.getTargetData()));
-    
-    // Turn the machine code intermediate representation into bytes in memory
-    // that may be executed.
-    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
-      llvm_report_error("Target does not support machine code emission!");
-    }
-    
-    // Initialize passes.
-    PM.doInitialization();
-  }    
-}
-
-/// materializeFunction - make sure the given function is fully read.  If the
-/// module is corrupt, this returns true and fills in the optional string with
-/// information about the problem.  If successful, this returns false.
-bool JIT::materializeFunction(Function *F, std::string *ErrInfo) {
-  // Read in the function if it exists in this Module.
-  if (F->hasNotBeenReadFromBitcode()) {
-    // Determine the module provider this function is provided by.
-    Module *M = F->getParent();
-    ModuleProvider *MP = 0;
-    for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
-      if (Modules[i]->getModule() == M) {
-        MP = Modules[i];
-        break;
-      }
-    }
-    if (MP)
-      return MP->materializeFunction(F, ErrInfo);
-
-    if (ErrInfo)
-      *ErrInfo = "Function isn't in a module we know about!";
-    return true;
-  }
-  // Succeed if the function is already read.
-  return false;
-}
-
 /// run - Start execution with the specified function and arguments.
 ///
 GenericValue JIT::runFunction(Function *F,
@@ -411,10 +411,10 @@ GenericValue JIT::runFunction(Function *F,
 
   // Handle some common cases first.  These cases correspond to common `main'
   // prototypes.
-  if (RetTy->isInteger(32) || RetTy->isVoidTy()) {
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
     switch (ArgValues.size()) {
     case 3:
-      if (FTy->getParamType(0)->isInteger(32) &&
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
           isa<PointerType>(FTy->getParamType(1)) &&
           isa<PointerType>(FTy->getParamType(2))) {
         int (*PF)(int, char **, const char **) =
@@ -429,7 +429,7 @@ GenericValue JIT::runFunction(Function *F,
       }
       break;
     case 2:
-      if (FTy->getParamType(0)->isInteger(32) &&
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
           isa<PointerType>(FTy->getParamType(1))) {
         int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
 
@@ -442,7 +442,7 @@ GenericValue JIT::runFunction(Function *F,
       break;
     case 1:
       if (FTy->getNumParams() == 1 &&
-          FTy->getParamType(0)->isInteger(32)) {
+          FTy->getParamType(0)->isIntegerTy(32)) {
         GenericValue rv;
         int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
         rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
@@ -553,8 +553,12 @@ GenericValue JIT::runFunction(Function *F,
   else
     ReturnInst::Create(F->getContext(), StubBB);           // Just return void.
 
-  // Finally, return the value returned by our nullary stub function.
-  return runFunction(Stub, std::vector<GenericValue>());
+  // Finally, call our nullary stub function.
+  GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+  // Erase it, since no other function can have a reference to it.
+  Stub->eraseFromParent();
+  // And return the result.
+  return Result;
 }
 
 void JIT::RegisterJITEventListener(JITEventListener *L) {
@@ -620,7 +624,6 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
 }
 
 void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
-  static bool isAlreadyCodeGenerating = false;
   assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
 
   // JIT the function
@@ -661,7 +664,7 @@ void *JIT::getPointerToFunction(Function *F) {
   // Now that this thread owns the lock, make sure we read in the function if it
   // exists in this Module.
   std::string ErrorMsg;
-  if (materializeFunction(F, &ErrorMsg)) {
+  if (F->Materialize(&ErrorMsg)) {
     llvm_report_error("Error reading function '" + F->getName()+
                       "' from bitcode file: " + ErrorMsg);
   }
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index b6f74ff..edae719 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -30,20 +30,20 @@ class TargetMachine;
 class JITState {
 private:
   FunctionPassManager PM;  // Passes to compile a function
-  ModuleProvider *MP;      // ModuleProvider used to create the PM
+  Module *M;               // Module used to create the PM
 
   /// PendingFunctions - Functions which have not been code generated yet, but
   /// were called from a function being code generated.
   std::vector<AssertingVH<Function> > PendingFunctions;
 
 public:
-  explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {}
+  explicit JITState(Module *M) : PM(M), M(M) {}
 
   FunctionPassManager &getPM(const MutexGuard &L) {
     return PM;
   }
   
-  ModuleProvider *getMP() const { return MP; }
+  Module *getModule() const { return M; }
   std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
     return PendingFunctions;
   }
@@ -61,16 +61,20 @@ class JIT : public ExecutionEngine {
   /// should be set to true.  Doing so breaks freeMachineCodeForFunction.
   bool AllocateGVsWithCode;
 
+  /// True while the JIT is generating code.  Used to assert against recursive
+  /// entry.
+  bool isAlreadyCodeGenerating;
+
   JITState *jitstate;
 
-  JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji,
+  JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
       JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
       bool AllocateGVsWithCode);
 public:
   ~JIT();
 
   static void Register() {
-    JITCtor = create;
+    JITCtor = createJIT;
   }
   
   /// getJITInfo - Return the target JIT information structure.
@@ -80,35 +84,22 @@ public:
   /// create - Create an return a new JIT compiler if there is one available
   /// for the current target.  Otherwise, return null.
   ///
-  static ExecutionEngine *create(ModuleProvider *MP,
+  static ExecutionEngine *create(Module *M,
                                  std::string *Err,
                                  JITMemoryManager *JMM,
                                  CodeGenOpt::Level OptLevel =
                                    CodeGenOpt::Default,
                                  bool GVsWithCode = true,
 				 CodeModel::Model CMM = CodeModel::Default) {
-    return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode,
+    return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
 				      CMM);
   }
 
-  virtual void addModuleProvider(ModuleProvider *MP);
+  virtual void addModule(Module *M);
   
-  /// removeModuleProvider - Remove a ModuleProvider from the list of modules.
-  /// Relases the Module from the ModuleProvider, materializing it in the
-  /// process, and returns the materialized Module.
-  virtual Module *removeModuleProvider(ModuleProvider *MP,
-                                       std::string *ErrInfo = 0);
-
-  /// deleteModuleProvider - Remove a ModuleProvider from the list of modules,
-  /// and deletes the ModuleProvider and owned Module.  Avoids materializing 
-  /// the underlying module.
-  virtual void deleteModuleProvider(ModuleProvider *P,std::string *ErrInfo = 0);
-
-  /// materializeFunction - make sure the given function is fully read.  If the
-  /// module is corrupt, this returns true and fills in the optional string with
-  /// information about the problem.  If successful, this returns false.
-  ///
-  bool materializeFunction(Function *F, std::string *ErrInfo = 0);
+  /// removeModule - Remove a Module from the list of modules.  Returns true if
+  /// M is found.
+  virtual bool removeModule(Module *M);
 
   /// runFunction - Start execution with the specified function and arguments.
   ///
@@ -177,14 +168,21 @@ public:
 
   /// selectTarget - Pick a target either via -march or by guessing the native
   /// arch.  Add any CPU features specified via -mcpu or -mattr.
-  static TargetMachine *selectTarget(ModuleProvider *MP, std::string *Err);
+  static TargetMachine *selectTarget(Module *M,
+                                     StringRef MArch,
+                                     StringRef MCPU,
+                                     const SmallVectorImpl<std::string>& MAttrs,
+                                     std::string *Err);
 
-  static ExecutionEngine *createJIT(ModuleProvider *MP,
+  static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
                                     CodeGenOpt::Level OptLevel,
                                     bool GVsWithCode,
-				    CodeModel::Model CMM);
+                                    CodeModel::Model CMM,
+                                    StringRef MArch,
+                                    StringRef MCPU,
+                                    const SmallVectorImpl<std::string>& MAttrs);
 
   // Run the JIT on F and return information about the generated code
   void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index c1051a9..946351b 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -522,7 +522,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
       JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
     }
 
-    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+    if (PointerSize == 4)
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    else
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
     JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
   } else {
     JCE->emitULEB128Bytes(1);
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 4dc119d..57c4375 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
@@ -57,13 +58,12 @@ using namespace llvm;
 STATISTIC(NumBytes, "Number of bytes of machine code compiled");
 STATISTIC(NumRelos, "Number of relocations applied");
 STATISTIC(NumRetries, "Number of retries with more memory");
-static JIT *TheJIT = 0;
 
 
 // A declaration may stop being a declaration once it's fully read from bitcode.
 // This function returns true if F is fully read and is still a declaration.
 static bool isNonGhostDeclaration(const Function *F) {
-  return F->isDeclaration() && !F->hasNotBeenReadFromBitcode();
+  return F->isDeclaration() && !F->isMaterializable();
 }
 
 //===----------------------------------------------------------------------===//
@@ -109,9 +109,13 @@ namespace {
     /// particular GlobalVariable so that we can reuse them if necessary.
     GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
 
+    /// Instance of the JIT this ResolverState serves.
+    JIT *TheJIT;
+
   public:
-    JITResolverState() : FunctionToLazyStubMap(this),
-                         FunctionToCallSitesMap(this) {}
+    JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+                                 FunctionToCallSitesMap(this),
+                                 TheJIT(jit) {}
 
     FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
       const MutexGuard& locked) {
@@ -227,18 +231,13 @@ namespace {
 
     JITEmitter &JE;
 
-    static JITResolver *TheJITResolver;
-  public:
-    explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) {
-      TheJIT = &jit;
+    /// Instance of JIT corresponding to this Resolver.
+    JIT *TheJIT;
 
+  public:
+    explicit JITResolver(JIT &jit, JITEmitter &je)
+      : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
       LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
-      assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
-      TheJITResolver = this;
-    }
-
-    ~JITResolver() {
-      TheJITResolver = 0;
     }
 
     /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
@@ -273,6 +272,44 @@ namespace {
     static void *JITCompilerFn(void *Stub);
   };
 
+  class StubToResolverMapTy {
+    /// Map a stub address to a specific instance of a JITResolver so that
+    /// lazily-compiled functions can find the right resolver to use.
+    ///
+    /// Guarded by Lock.
+    std::map<void*, JITResolver*> Map;
+
+    /// Guards Map from concurrent accesses.
+    mutable sys::Mutex Lock;
+
+  public:
+    /// Registers a Stub to be resolved by Resolver.
+    void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+      MutexGuard guard(Lock);
+      Map.insert(std::make_pair(Stub, Resolver));
+    }
+    /// Unregisters the Stub when it's invalidated.
+    void UnregisterStubResolver(void *Stub) {
+      MutexGuard guard(Lock);
+      Map.erase(Stub);
+    }
+    /// Returns the JITResolver instance that owns the Stub.
+    JITResolver *getResolverFromStub(void *Stub) const {
+      MutexGuard guard(Lock);
+      // The address given to us for the stub may not be exactly right, it might
+      // be a little bit after the stub.  As such, use upper_bound to find it.
+      // This is the same trick as in LookupFunctionFromCallSite from
+      // JITResolverState.
+      std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+      assert(I != Map.begin() && "This is not a known stub!");
+      --I;
+      return I->second;
+    }
+  };
+  /// This needs to be static so that a lazy call stub can access it with no
+  /// context except the address of the stub.
+  ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
   /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
   /// used to output functions to memory for execution.
   class JITEmitter : public JITCodeEmitter {
@@ -371,10 +408,13 @@ namespace {
 
     DILocation PrevDLT;
 
+    /// Instance of the JIT
+    JIT *TheJIT;
+
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), PrevDLT(NULL) {
+        EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -495,8 +535,6 @@ namespace {
   };
 }
 
-JITResolver *JITResolver::TheJITResolver = 0;
-
 void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
   JRS->EraseAllCallSitesPrelocked(F);
 }
@@ -551,6 +589,10 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
   DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
         << F->getName() << "'\n");
 
+  // Register this JITResolver as the one corresponding to this call site so
+  // JITCompilerFn will be able to find it.
+  StubToResolverMap->RegisterStubResolver(Stub, this);
+
   // Finally, keep track of the stub-to-Function mapping so that the
   // JITCompilerFn knows which function to compile!
   state.AddCallSite(locked, Stub, F);
@@ -637,6 +679,9 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
 GlobalValue *JITResolver::invalidateStub(void *Stub) {
   MutexGuard locked(TheJIT->lock);
 
+  // Remove the stub from the StubToResolverMap.
+  StubToResolverMap->UnregisterStubResolver(Stub);
+
   GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
 
   // Look up the cheap way first, to see if it's a function stub we are
@@ -671,7 +716,8 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
 /// been entered.  It looks up which function this stub corresponds to, compiles
 /// it if necessary, then returns the resultant function pointer.
 void *JITResolver::JITCompilerFn(void *Stub) {
-  JITResolver &JR = *TheJITResolver;
+  JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+  assert(JR && "Unable to find the corresponding JITResolver to the call site");
 
   Function* F = 0;
   void* ActualPtr = 0;
@@ -680,24 +726,24 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // Only lock for getting the Function. The call getPointerToFunction made
     // in this function might trigger function materializing, which requires
     // JIT lock to be unlocked.
-    MutexGuard locked(TheJIT->lock);
+    MutexGuard locked(JR->TheJIT->lock);
 
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
     pair<void*, Function*> I =
-      JR.state.LookupFunctionFromCallSite(locked, Stub);
+      JR->state.LookupFunctionFromCallSite(locked, Stub);
     F = I.second;
     ActualPtr = I.first;
   }
 
   // If we have already code generated the function, just return the address.
-  void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
+  void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
 
   if (!Result) {
     // Otherwise we don't have it, do lazy compilation now.
 
     // If lazy compilation is disabled, emit a useful error message and abort.
-    if (!TheJIT->isCompilingLazily()) {
+    if (!JR->TheJIT->isCompilingLazily()) {
       llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
@@ -706,11 +752,11 @@ void *JITResolver::JITCompilerFn(void *Stub) {
           << "' In stub ptr = " << Stub << " actual ptr = "
           << ActualPtr << "\n");
 
-    Result = TheJIT->getPointerToFunction(F);
+    Result = JR->TheJIT->getPointerToFunction(F);
   }
 
   // Reacquire the lock to update the GOT map.
-  MutexGuard locked(TheJIT->lock);
+  MutexGuard locked(JR->TheJIT->lock);
 
   // We might like to remove the call site from the CallSiteToFunction map, but
   // we can't do that! Multiple threads could be stuck, waiting to acquire the
@@ -725,8 +771,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
   // if they see it still using the stub address.
   // Note: this is done so the Resolver doesn't have to manage GOT memory
   // Do this without allocating map space if the target isn't using a GOT
-  if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end())
-    JR.revGOTMap[Result] = JR.revGOTMap[Stub];
+  if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+    JR->revGOTMap[Result] = JR->revGOTMap[Stub];
 
   return Result;
 }
@@ -839,7 +885,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
   return Size;
 }
 
-static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
+static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return 0;
 
@@ -847,9 +893,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
   for (unsigned i = 0, e = JT.size(); i != e; ++i)
     NumEntries += JT[i].MBBs.size();
 
-  unsigned EntrySize = MJTI->getEntrySize();
-
-  return NumEntries * EntrySize;
+  return NumEntries * MJTI->getEntrySize(*jit->getTargetData());
 }
 
 static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
@@ -1017,7 +1061,6 @@ void JITEmitter::startFunction(MachineFunction &F) {
   if (MemMgr->NeedsExactSize()) {
     DEBUG(dbgs() << "JIT: ExactSize\n");
     const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
-    MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
     MachineConstantPool *MCP = F.getConstantPool();
 
     // Ensure the constant pool/jump table info is at least 4-byte aligned.
@@ -1029,11 +1072,14 @@ void JITEmitter::startFunction(MachineFunction &F) {
     // Add the constant pool size
     ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
 
-    // Add the aligment of the jump table info
-    ActualSize = RoundUpToAlign(ActualSize, MJTI->getAlignment());
+    if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) {
+      // Add the aligment of the jump table info
+      ActualSize = RoundUpToAlign(ActualSize,
+                             MJTI->getEntryAlignment(*TheJIT->getTargetData()));
 
-    // Add the jump table size
-    ActualSize += GetJumpTableSizeInBytes(MJTI);
+      // Add the jump table size
+      ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT);
+    }
 
     // Add the alignment for the function
     ActualSize = RoundUpToAlign(ActualSize,
@@ -1062,7 +1108,8 @@ void JITEmitter::startFunction(MachineFunction &F) {
   emitAlignment(16);
 
   emitConstantPool(F.getConstantPool());
-  initJumpTableInfo(F.getJumpTableInfo());
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    initJumpTableInfo(MJTI);
 
   // About to start emitting the machine code for the function.
   emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
@@ -1084,7 +1131,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     return true;
   }
 
-  emitJumpTableInfo(F.getJumpTableInfo());
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    emitJumpTableInfo(MJTI);
 
   // FnStart is the start of the text, not the start of the constant pool and
   // other per-function data.
@@ -1404,13 +1452,14 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
   for (unsigned i = 0, e = JT.size(); i != e; ++i)
     NumEntries += JT[i].MBBs.size();
 
-  unsigned EntrySize = MJTI->getEntrySize();
+  unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData());
 
   // Just allocate space for all the jump tables now.  We will fix up the actual
   // MBB entries in the tables after we emit the code for each block, since then
   // we will know the final locations of the MBBs in memory.
   JumpTable = MJTI;
-  JumpTableBase = allocateSpace(NumEntries * EntrySize, MJTI->getAlignment());
+  JumpTableBase = allocateSpace(NumEntries * EntrySize,
+                             MJTI->getEntryAlignment(*TheJIT->getTargetData()));
 }
 
 void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
@@ -1420,8 +1469,32 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty() || JumpTableBase == 0) return;
 
-  if (TargetMachine::getRelocationModel() == Reloc::PIC_) {
-    assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?");
+  
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress: {
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) &&
+           "Cross JIT'ing?");
+    
+    // For each jump table, map each target in the jump table to the address of
+    // an emitted MachineBasicBlock.
+    intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+    
+    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      // Store the address of the basic block for this jump table slot in the
+      // memory we allocated for the jump table in 'initJumpTableInfo'
+      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+        *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+    }
+    break;
+  }
+      
+  case MachineJumpTableInfo::EK_Custom32:
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?");
     // For each jump table, place the offset from the beginning of the table
     // to the target address.
     int *SlotPtr = (int*)JumpTableBase;
@@ -1433,23 +1506,12 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
       uintptr_t Base = (uintptr_t)SlotPtr;
       for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
         uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+        /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
         *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
       }
     }
-  } else {
-    assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?");
-
-    // For each jump table, map each target in the jump table to the address of
-    // an emitted MachineBasicBlock.
-    intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
-
-    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
-      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-      // Store the address of the basic block for this jump table slot in the
-      // memory we allocated for the jump table in 'initJumpTableInfo'
-      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
-        *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
-    }
+    break;
+  }
   }
 }
 
@@ -1505,9 +1567,9 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
   const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
   assert(Index < JT.size() && "Invalid jump table index!");
 
-  unsigned Offset = 0;
-  unsigned EntrySize = JumpTable->getEntrySize();
+  unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData());
 
+  unsigned Offset = 0;
   for (unsigned i = 0; i < Index; ++i)
     Offset += JT[i].MBBs.size();
 
@@ -1536,19 +1598,6 @@ JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
   return new JITEmitter(jit, JMM, tm);
 }
 
-// getPointerToNamedFunction - This function is used as a global wrapper to
-// JIT::getPointerToNamedFunction for the purpose of resolving symbols when
-// bugpoint is debugging the JIT. In that scenario, we are loading an .so and
-// need to resolve function(s) that are being mis-codegenerated, so we need to
-// resolve their addresses at runtime, and this is the way to do it.
-extern "C" {
-  void *getPointerToNamedFunction(const char *Name) {
-    if (Function *F = TheJIT->FindFunctionNamed(Name))
-      return TheJIT->getPointerToFunction(F);
-    return TheJIT->getPointerToNamedFunction(Name);
-  }
-}
-
 // getPointerToFunctionOrStub - If the specified function has been
 // code-gen'd, return a pointer to the function.  If not, compile it, or use
 // a stub to implement lazy compilation if available.
diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile
index 1c93c06..aafa3d9 100644
--- a/lib/ExecutionEngine/JIT/Makefile
+++ b/lib/ExecutionEngine/JIT/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMJIT
-CXXFLAGS = -fno-rtti
 
 # Get the $(ARCH) setting
 include $(LEVEL)/Makefile.config
diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp
index 8bed33b..3349c33 100644
--- a/lib/ExecutionEngine/JIT/TargetSelect.cpp
+++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -15,7 +15,6 @@
 
 #include "JIT.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -25,28 +24,14 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static cl::opt<std::string>
-MArch("march",
-      cl::desc("Architecture to generate assembly for (see --version)"));
-
-static cl::opt<std::string>
-MCPU("mcpu",
-  cl::desc("Target a specific cpu type (-mcpu=help for details)"),
-  cl::value_desc("cpu-name"),
-  cl::init(""));
-
-static cl::list<std::string>
-MAttrs("mattr",
-  cl::CommaSeparated,
-  cl::desc("Target specific attributes (-mattr=help for details)"),
-  cl::value_desc("a1,+a2,-a3,..."));
-
 /// selectTarget - Pick a target either via -march or by guessing the native
 /// arch.  Add any CPU features specified via -mcpu or -mattr.
-TargetMachine *JIT::selectTarget(ModuleProvider *MP, std::string *ErrorStr) {
-  Module &Mod = *MP->getModule();
-
-  Triple TheTriple(Mod.getTargetTriple());
+TargetMachine *JIT::selectTarget(Module *Mod,
+                                 StringRef MArch,
+                                 StringRef MCPU,
+                                 const SmallVectorImpl<std::string>& MAttrs,
+                                 std::string *ErrorStr) {
+  Triple TheTriple(Mod->getTargetTriple());
   if (TheTriple.getTriple().empty())
     TheTriple.setTriple(sys::getHostTriple());
 
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 2387b0e..e0e050e 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -9,6 +9,5 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMExecutionEngine
 PARALLEL_DIRS = Interpreter JIT
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 365ec05..2c4ed7f 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Config/config.h"
@@ -139,8 +138,10 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
   do {
     CurrentlyUndefinedSymbols = UndefinedSymbols;
 
-    // Find the modules we need to link into the target module
-    std::set<ModuleProvider*> Modules;
+    // Find the modules we need to link into the target module.  Note that arch
+    // keeps ownership of these modules and may return the same Module* from a
+    // subsequent call.
+    std::set<Module*> Modules;
     if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
       return error("Cannot find symbols in '" + Filename.str() + 
                    "': " + ErrMsg);
@@ -156,19 +157,17 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
     NotDefinedByArchive.insert(UndefinedSymbols.begin(),
         UndefinedSymbols.end());
 
-    // Loop over all the ModuleProviders that we got back from the archive
-    for (std::set<ModuleProvider*>::iterator I=Modules.begin(), E=Modules.end();
+    // Loop over all the Modules that we got back from the archive
+    for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
          I != E; ++I) {
 
       // Get the module we must link in.
       std::string moduleErrorMsg;
-      std::auto_ptr<Module> AutoModule((*I)->releaseModule( &moduleErrorMsg ));
-      if (!moduleErrorMsg.empty())
-        return error("Could not load a module: " + moduleErrorMsg);
-
-      Module* aModule = AutoModule.get();
-
+      Module* aModule = *I;
       if (aModule != NULL) {
+        if (aModule->MaterializeAll(&moduleErrorMsg))
+          return error("Could not load a module: " + moduleErrorMsg);
+
         verbose("  Linking in module: " + aModule->getModuleIdentifier());
 
         // Link it in
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index e2cd47a..7f441b0 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -392,9 +392,20 @@ static Value *RemapOperand(const Value *In,
       assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
       llvm_unreachable("Unknown type of derived type constant value!");
     }
-  } else if (isa<MDNode>(In) ||  isa<MDString>(In)) {
-    Result = const_cast<Value*>(In);
-  } else if (isa<InlineAsm>(In)) {
+  } else if (const MDNode *MD = dyn_cast<MDNode>(In)) {
+    if (MD->isFunctionLocal()) {
+      SmallVector<Value*, 4> Elts;
+      for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+        if (MD->getOperand(i))
+          Elts.push_back(RemapOperand(MD->getOperand(i), ValueMap));
+        else
+          Elts.push_back(NULL);
+      }
+      Result = MDNode::get(In->getContext(), Elts.data(), MD->getNumOperands());
+    } else {
+      Result = const_cast<Value*>(In);
+    }
+  } else if (isa<MDString>(In) || isa<InlineAsm>(In) || isa<Instruction>(In)) {
     Result = const_cast<Value*>(In);
   }
 
diff --git a/lib/Linker/Makefile b/lib/Linker/Makefile
index 2179fd2..19e646b 100644
--- a/lib/Linker/Makefile
+++ b/lib/Linker/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMLinker
 BUILD_ARCHIVE := 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 547f904..f3f063f 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -22,11 +22,10 @@ MCAsmInfo::MCAsmInfo() {
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasStaticCtorDtorReferenceInStaticMode = false;
-  NeedsSet = false;
   MaxInstLength = 4;
   PCSymbol = "$";
   SeparatorChar = ';';
-  CommentColumn = 60;
+  CommentColumn = 40;
   CommentString = "#";
   GlobalPrefix = "";
   PrivateGlobalPrefix = ".";
@@ -48,12 +47,11 @@ MCAsmInfo::MCAsmInfo() {
   AlignDirective = "\t.align\t";
   AlignmentIsInBytes = true;
   TextAlignFillValue = 0;
-  JumpTableDirective = 0;
-  PICJumpTableDirective = 0;
+  GPRel32Directive = 0;
   GlobalDirective = "\t.globl\t";
-  SetDirective = 0;
+  HasSetDirective = true;
   HasLCOMMDirective = false;
-  COMMDirectiveTakesAlignment = true;
+  COMMDirectiveAlignmentIsInBytes = true;
   HasDotTypeDotSizeDirective = true;
   HasSingleParameterDotFile = true;
   HasNoDeadStrip = false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index e395acd..9130493 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -18,14 +18,13 @@ using namespace llvm;
 
 MCAsmInfoCOFF::MCAsmInfoCOFF() {
   GlobalPrefix = "_";
+  COMMDirectiveAlignmentIsInBytes = false;
   HasLCOMMDirective = true;
-  COMMDirectiveTakesAlignment = false;
   HasDotTypeDotSizeDirective = false;
   HasSingleParameterDotFile = false;
   PrivateGlobalPrefix = "L";  // Prefix for private global symbols
   WeakRefDirective = "\t.weak\t";
-  LinkOnceDirective = "\t.linkonce same_size\n";
-  SetDirective = "\t.set\t";
+  LinkOnceDirective = "\t.linkonce discard\n";
   
   // Doesn't support visibility:
   HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid;
@@ -37,4 +36,3 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   SupportsDebugInformation = true;
   DwarfSectionOffsetDirective = "\t.secrel32\t";
 }
-
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 9902f50..da865ad 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -21,12 +21,12 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   GlobalPrefix = "_";
   PrivateGlobalPrefix = "L";
   LinkerPrivateGlobalPrefix = "l";
-  NeedsSet = true;
   AllowQuotesInName = true;
   HasSingleParameterDotFile = false;
   HasSubsectionsViaSymbols = true;
 
   AlignmentIsInBytes = false;
+  COMMDirectiveAlignmentIsInBytes = false;
   InlineAsmStart = " InlineAsm Start";
   InlineAsmEnd = " InlineAsm End";
 
@@ -36,7 +36,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
   HasStaticCtorDtorReferenceInStaticMode = true;
-  SetDirective = "\t.set";
   
   HiddenVisibilityAttr = MCSA_PrivateExtern;
   // Doesn't support protected visibility.
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index bf39239..6add1b4 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -29,25 +29,32 @@ namespace {
 class MCAsmStreamer : public MCStreamer {
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
-  bool IsLittleEndian, IsVerboseAsm;
   MCInstPrinter *InstPrinter;
   MCCodeEmitter *Emitter;
   
   SmallString<128> CommentToEmit;
   raw_svector_ostream CommentStream;
+
+  unsigned IsLittleEndian : 1;
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+
 public:
   MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
                 const MCAsmInfo &mai,
                 bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer,
-                MCCodeEmitter *emitter)
-    : MCStreamer(Context), OS(os), MAI(mai), IsLittleEndian(isLittleEndian),
-      IsVerboseAsm(isVerboseAsm), InstPrinter(printer), Emitter(emitter),
-      CommentStream(CommentToEmit) {}
+                MCCodeEmitter *emitter, bool showInst)
+    : MCStreamer(Context), OS(os), MAI(mai), InstPrinter(printer),
+      Emitter(emitter), CommentStream(CommentToEmit),
+      IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
   ~MCAsmStreamer() {}
 
   bool isLittleEndian() const { return IsLittleEndian; }
-  
-  
+
   inline void EmitEOL() {
     // If we don't have any comments, just emit a \n.
     if (!IsVerboseAsm) {
@@ -57,13 +64,20 @@ public:
     EmitCommentsAndEOL();
   }
   void EmitCommentsAndEOL();
-  
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
   /// AddComment - Add a comment that can be emitted to the generated .s
   /// file if applicable as a QoI issue to make the output of the compiler
   /// more readable.  This only affects the MCAsmStreamer, and only when
   /// verbose assembly output is enabled.
   virtual void AddComment(const Twine &T);
-  
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
   /// GetCommentOS - Return a raw_ostream that comments can be written to.
   /// Unlike AddComment, you are required to terminate comments with \n if you
   /// use this method.
@@ -72,12 +86,12 @@ public:
       return nulls();  // Discard comments unless in verbose asm mode.
     return CommentStream;
   }
-  
+
   /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
   virtual void AddBlankLine() {
     EmitEOL();
   }
-  
+
   /// @name MCStreamer Interface
   /// @{
 
@@ -93,6 +107,7 @@ public:
 
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
 
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
 
@@ -109,6 +124,8 @@ public:
 
   virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
   virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace);
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+  
 
   virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
                         unsigned AddrSpace);
@@ -119,9 +136,12 @@ public:
 
   virtual void EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
-  
-  virtual void EmitInstruction(const MCInst &Inst);
 
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+  virtual void EmitInstruction(const MCInst &Inst);
+  
   virtual void Finish();
   
   /// @}
@@ -178,12 +198,6 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-static inline const MCExpr *truncateToSize(const MCExpr *Value,
-                                           unsigned Bytes) {
-  // FIXME: Do we really need this routine?
-  return Value;
-}
-
 void MCAsmStreamer::SwitchSection(const MCSection *Section) {
   assert(Section && "Cannot switch to a null section!");
   if (Section != CurSection) {
@@ -226,7 +240,29 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                         MCSymbolAttr Attribute) {
   switch (Attribute) {
   case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
-  case MCSA_Global:         OS << MAI.getGlobalDirective(); break; // .globl
+  case MCSA_ELF_TypeFunction:    /// .type _foo, STT_FUNC  # aka @function
+  case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
+  case MCSA_ELF_TypeObject:      /// .type _foo, STT_OBJECT  # aka @object
+  case MCSA_ELF_TypeTLS:         /// .type _foo, STT_TLS     # aka @tls_object
+  case MCSA_ELF_TypeCommon:      /// .type _foo, STT_COMMON  # aka @common
+  case MCSA_ELF_TypeNoType:      /// .type _foo, STT_NOTYPE  # aka @notype
+    assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+    OS << "\t.type\t" << *Symbol << ','
+       << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
+    switch (Attribute) {
+    default: assert(0 && "Unknown ELF .type");
+    case MCSA_ELF_TypeFunction:    OS << "function"; break;
+    case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
+    case MCSA_ELF_TypeObject:      OS << "object"; break;
+    case MCSA_ELF_TypeTLS:         OS << "tls_object"; break;
+    case MCSA_ELF_TypeCommon:      OS << "common"; break;
+    case MCSA_ELF_TypeNoType:      OS << "no_type"; break;
+    }
+    EmitEOL();
+    return;
+  case MCSA_Global: // .globl/.global
+    OS << MAI.getGlobalDirective();
+    break;
   case MCSA_Hidden:         OS << ".hidden ";          break;
   case MCSA_IndirectSymbol: OS << ".indirect_symbol "; break;
   case MCSA_Internal:       OS << ".internal ";        break;
@@ -251,11 +287,16 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  assert(MAI.hasDotTypeDotSizeDirective());
+  OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+}
+
 void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                      unsigned ByteAlignment) {
   OS << "\t.comm\t" << *Symbol << ',' << Size;
-  if (ByteAlignment != 0 && MAI.getCOMMDirectiveTakesAlignment()) {
-    if (MAI.getAlignmentIsInBytes())
+  if (ByteAlignment != 0) {
+    if (MAI.getCOMMDirectiveAlignmentIsInBytes())
       OS << ',' << ByteAlignment;
     else
       OS << ',' << Log2_32(ByteAlignment);
@@ -292,6 +333,40 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
 
 static inline char toOctal(int X) { return (X&7)+'0'; }
 
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+  
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+    
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+    
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+  
+  OS << '"';
+}
+
+
 void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   assert(CurSection && "Cannot emit contents before setting section!");
   if (Data.empty()) return;
@@ -312,34 +387,8 @@ void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
     OS << MAI.getAsciiDirective();
   }
 
-  OS << " \"";
-  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
-    unsigned char C = Data[i];
-    if (C == '"' || C == '\\') {
-      OS << '\\' << (char)C;
-      continue;
-    }
-    
-    if (isprint((unsigned char)C)) {
-      OS << (char)C;
-      continue;
-    }
-    
-    switch (C) {
-    case '\b': OS << "\\b"; break;
-    case '\f': OS << "\\f"; break;
-    case '\n': OS << "\\n"; break;
-    case '\r': OS << "\\r"; break;
-    case '\t': OS << "\\t"; break;
-    default:
-      OS << '\\';
-      OS << toOctal(C >> 6);
-      OS << toOctal(C >> 3);
-      OS << toOctal(C >> 0);
-      break;
-    }
-  }
-  OS << '"';
+  OS << ' ';
+  PrintQuotedString(Data, OS);
   EmitEOL();
 }
 
@@ -386,10 +435,17 @@ void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size,
   }
   
   assert(Directive && "Invalid size for machine code value!");
-  OS << Directive << *truncateToSize(Value, Size);
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
   EmitEOL();
 }
 
+
 /// EmitFill - Emit NumBytes bytes worth of the value specified by
 /// FillValue.  This implements directives such as '.space'.
 void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
@@ -464,49 +520,133 @@ void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
-  assert(CurSection && "Cannot emit contents before setting section!");
 
-  // If we have an AsmPrinter, use that to print.
-  if (InstPrinter) {
-    InstPrinter->printInst(&Inst);
-    EmitEOL();
+void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
 
-    // Show the encoding if we have a code emitter.
-    if (Emitter) {
-      SmallString<256> Code;
-      raw_svector_ostream VecOS(Code);
-      Emitter->EncodeInstruction(Inst, VecOS);
-      VecOS.flush();
-  
-      OS.indent(20);
-      OS << " # encoding: [";
-      for (unsigned i = 0, e = Code.size(); i != e; ++i) {
-        if (i)
-          OS << ',';
-        OS << format("%#04x", uint8_t(Code[i]));
+void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+  OS << "\t.file\t" << FileNo << ' ';
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
+  raw_ostream &OS = GetCommentOS();
+  SmallString<256> Code;
+  SmallVector<MCFixup, 4> Fixups;
+  raw_svector_ostream VecOS(Code);
+  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // If we are showing fixups, create symbolic markers in the encoded
+  // representation. We do this by making a per-bit map to the fixup item index,
+  // then trying to display it as nicely as possible.
+  SmallVector<uint8_t, 64> FixupMap;
+  FixupMap.resize(Code.size() * 8);
+  for (unsigned i = 0, e = Code.size() * 8; i != e; ++i)
+    FixupMap[i] = 0;
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    for (unsigned j = 0; j != Info.TargetSize; ++j) {
+      unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
+      assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
+      FixupMap[Index] = 1 + i;
+    }
+  }
+
+  OS << "encoding: [";
+  for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+    if (i)
+      OS << ',';
+
+    // See if all bits are the same map entry.
+    uint8_t MapEntry = FixupMap[i * 8 + 0];
+    for (unsigned j = 1; j != 8; ++j) {
+      if (FixupMap[i * 8 + j] == MapEntry)
+        continue;
+
+      MapEntry = uint8_t(~0U);
+      break;
+    }
+
+    if (MapEntry != uint8_t(~0U)) {
+      if (MapEntry == 0) {
+        OS << format("0x%02x", uint8_t(Code[i]));
+      } else {
+        assert(Code[i] == 0 && "Encoder wrote into fixed up bit!");
+        OS << char('A' + MapEntry - 1);
+      }
+    } else {
+      // Otherwise, write out in binary.
+      OS << "0b";
+      for (unsigned j = 8; j--;) {
+        unsigned Bit = (Code[i] >> j) & 1;
+        if (uint8_t MapEntry = FixupMap[i * 8 + j]) {
+          assert(Bit == 0 && "Encoder wrote into fixed up bit!");
+          OS << char('A' + MapEntry - 1);
+        } else
+          OS << Bit;
       }
-      OS << "]\n";
     }
+  }
+  OS << "]\n";
 
-    return;
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    OS << "  fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
+       << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
   }
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(CurSection && "Cannot emit contents before setting section!");
 
-  // Otherwise fall back to a structural printing for now. Eventually we should
-  // always have access to the target specific printer.
-  Inst.print(OS, &MAI);
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    raw_ostream &OS = GetCommentOS();
+    OS << "<MCInst #" << Inst.getOpcode();
+    
+    StringRef InstName;
+    if (InstPrinter)
+      InstName = InstPrinter->getOpcodeName(Inst.getOpcode());
+    if (!InstName.empty())
+      OS << ' ' << InstName;
+    
+    for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
+      OS << "\n  ";
+      Inst.getOperand(i).print(OS, &MAI);
+    }
+    OS << ">\n";
+  }
+  
+  // If we have an AsmPrinter, use that to print, otherwise dump the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst);
+  else
+    Inst.print(OS, &MAI);
   EmitEOL();
 }
 
 void MCAsmStreamer::Finish() {
   OS.flush();
 }
-    
+
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     const MCAsmInfo &MAI, bool isLittleEndian,
                                     bool isVerboseAsm, MCInstPrinter *IP,
-                                    MCCodeEmitter *CE) {
+                                    MCCodeEmitter *CE, bool ShowInst) {
   return new MCAsmStreamer(Context, OS, MAI, isLittleEndian, isVerboseAsm,
-                           IP, CE);
+                           IP, CE, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index f0f5a47..653fbf2 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -13,14 +13,20 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Target/TargetMachOWriterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+// FIXME: Gross.
+#include "../Target/X86/X86FixupKinds.h"
+
 #include <vector>
 using namespace llvm;
 
@@ -45,6 +51,30 @@ static bool isVirtualSection(const MCSection &Section) {
   return (Type == MCSectionMachO::S_ZEROFILL);
 }
 
+static unsigned getFixupKindLog2Size(MCFixupKind Kind) {
+  switch (Kind) {
+  default: llvm_unreachable("invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+static bool isFixupKindPCRel(MCFixupKind Kind) {
+  switch (Kind) {
+  default:
+    return false;
+  case X86::reloc_pcrel_1byte:
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+    return true;
+  }
+}
+
 class MachObjectWriter {
   // See <mach-o/loader.h>.
   enum {
@@ -203,9 +233,9 @@ public:
     Write32(Header_Magic32);
 
     // FIXME: Support cputype.
-    Write32(TargetMachOWriterInfo::HDR_CPU_TYPE_I386);
+    Write32(MachO::CPUTypeI386);
     // FIXME: Support cpusubtype.
-    Write32(TargetMachOWriterInfo::HDR_CPU_SUBTYPE_I386_ALL);
+    Write32(MachO::CPUSubType_I386_ALL);
     Write32(HFT_Object);
     Write32(NumLoadCommands);    // Object files have a single load command, the
                                  // segment.
@@ -266,11 +296,15 @@ public:
     Write32(SD.getSize()); // size
     Write32(FileOffset);
 
+    unsigned Flags = Section.getTypeAndAttributes();
+    if (SD.hasInstructions())
+      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
     assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
     Write32(Log2_32(SD.getAlignment()));
     Write32(NumRelocations ? RelocationsStart : 0);
     Write32(NumRelocations);
-    Write32(Section.getTypeAndAttributes());
+    Write32(Flags);
     Write32(0); // reserved1
     Write32(Section.getStubSize()); // reserved2
 
@@ -398,12 +432,12 @@ public:
     uint32_t Word0;
     uint32_t Word1;
   };
-  void ComputeScatteredRelocationInfo(MCAssembler &Asm,
-                                      MCSectionData::Fixup &Fixup,
+  void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
+                                      MCAsmFixup &Fixup,
                                       const MCValue &Target,
                              DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                                      std::vector<MachRelocationEntry> &Relocs) {
-    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
     unsigned IsPCRel = 0;
     unsigned Type = RIT_Vanilla;
 
@@ -420,11 +454,14 @@ public:
       Value2 = SD->getFragment()->getAddress() + SD->getOffset();
     }
 
-    unsigned Log2Size = Log2_32(Fixup.Size);
-    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
 
     // The value which goes in the fixup is current value of the expression.
     Fixup.FixedValue = Value - Value2 + Target.getConstant();
+    if (isFixupKindPCRel(Fixup.Kind)) {
+      Fixup.FixedValue -= Address + (1 << Log2Size);
+      IsPCRel = 1;
+    }
 
     MachRelocationEntry MRE;
     MRE.Word0 = ((Address   <<  0) |
@@ -449,8 +486,8 @@ public:
     }
   }
 
-  void ComputeRelocationInfo(MCAssembler &Asm,
-                             MCSectionData::Fixup &Fixup,
+  void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
+                             MCAsmFixup &Fixup,
                              DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                              std::vector<MachRelocationEntry> &Relocs) {
     MCValue Target;
@@ -462,11 +499,11 @@ public:
     if (Target.getSymB() ||
         (Target.getSymA() && !Target.getSymA()->isUndefined() &&
          Target.getConstant()))
-      return ComputeScatteredRelocationInfo(Asm, Fixup, Target,
+      return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
                                             SymbolMap, Relocs);
 
     // See <reloc.h>.
-    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
     uint32_t Value = 0;
     unsigned Index = 0;
     unsigned IsPCRel = 0;
@@ -475,6 +512,8 @@ public:
 
     if (Target.isAbsolute()) { // constant
       // SymbolNum of 0 indicates the absolute section.
+      //
+      // FIXME: When is this generated?
       Type = RIT_Vanilla;
       Value = 0;
       llvm_unreachable("FIXME: Not yet implemented!");
@@ -491,10 +530,11 @@ public:
         //
         // FIXME: O(N)
         Index = 1;
-        for (MCAssembler::iterator it = Asm.begin(),
-               ie = Asm.end(); it != ie; ++it, ++Index)
+        MCAssembler::iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
           if (&*it == SD->getFragment()->getParent())
             break;
+        assert(it != ie && "Unable to find section index!");
         Value = SD->getFragment()->getAddress() + SD->getOffset();
       }
 
@@ -504,8 +544,12 @@ public:
     // The value which goes in the fixup is current value of the expression.
     Fixup.FixedValue = Value + Target.getConstant();
 
-    unsigned Log2Size = Log2_32(Fixup.Size);
-    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+
+    if (isFixupKindPCRel(Fixup.Kind)) {
+      Fixup.FixedValue -= Address + (1<<Log2Size);
+      IsPCRel = 1;
+    }
 
     // struct relocation_info (8 bytes)
     MachRelocationEntry MRE;
@@ -742,7 +786,7 @@ public:
                                      SD.getAddress() + SD.getFileSize());
     }
 
-    // The section data is passed to 4 bytes.
+    // The section data is padded to 4 bytes.
     //
     // FIXME: Is this machine dependent?
     unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
@@ -757,22 +801,25 @@ public:
     // ... and then the section headers.
     //
     // We also compute the section relocations while we do this. Note that
-    // compute relocation info will also update the fixup to have the correct
-    // value; this will be overwrite the appropriate data in the fragment when
-    // it is written.
+    // computing relocation info will also update the fixup to have the correct
+    // value; this will overwrite the appropriate data in the fragment when it
+    // is written.
     std::vector<MachRelocationEntry> RelocInfos;
     uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie;
-         ++it) {
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
       MCSectionData &SD = *it;
 
       // The assembler writes relocations in the reverse order they were seen.
       //
       // FIXME: It is probably more complicated than this.
       unsigned NumRelocsStart = RelocInfos.size();
-      for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i)
-        ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap,
-                              RelocInfos);
+      for (MCSectionData::reverse_iterator it2 = SD.rbegin(),
+             ie2 = SD.rend(); it2 != ie2; ++it2)
+        if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
+          for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
+            ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
+                                  SymbolMap, RelocInfos);
 
       unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
       uint64_t SectionStart = SectionDataStart + SD.getAddress();
@@ -867,6 +914,16 @@ public:
       OS << StringTable.str();
     }
   }
+
+  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+
+    // FIXME: Endianness assumption.
+    assert(Fixup.Offset + Size <= DF.getContents().size() &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8));
+  }
 };
 
 /* *** */
@@ -901,34 +958,12 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     Address(~UINT64_C(0)),
     Size(~UINT64_C(0)),
     FileSize(~UINT64_C(0)),
-    LastFixupLookup(~0)
+    HasInstructions(false)
 {
   if (A)
     A->getSectionList().push_back(this);
 }
 
-const MCSectionData::Fixup *
-MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const {
-  // Use a one level cache to turn the common case of accessing the fixups in
-  // order into O(1) instead of O(N).
-  unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size();
-  if (i >= End)
-    i = 0;
-  while (Count--) {
-    const Fixup &F = Fixups[i];
-    if (F.Fragment == Fragment && F.Offset == Offset) {
-      LastFixupLookup = i;
-      return &F;
-    }
-
-    ++i;
-    if (i == End)
-      i = 0;
-  }
-
-  return 0;
-}
-
 /* *** */
 
 MCSymbolData::MCSymbolData() : Symbol(0) {}
@@ -975,31 +1010,10 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
     }
 
     case MCFragment::FT_Data:
+    case MCFragment::FT_Fill:
       F.setFileSize(F.getMaxFileSize());
       break;
 
-    case MCFragment::FT_Fill: {
-      MCFillFragment &FF = cast<MCFillFragment>(F);
-
-      F.setFileSize(F.getMaxFileSize());
-
-      MCValue Target;
-      if (!FF.getValue().EvaluateAsRelocatable(Target))
-        llvm_report_error("expected relocatable expression");
-
-      // If the fill value is constant, thats it.
-      if (Target.isAbsolute())
-        break;
-
-      // Otherwise, add fixups for the values.
-      for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
-        MCSectionData::Fixup Fix(F, i * FF.getValueSize(),
-                                 FF.getValue(),FF.getValueSize());
-        SD.getFixups().push_back(Fix);
-      }
-      break;
-    }
-
     case MCFragment::FT_Org: {
       MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
@@ -1082,39 +1096,30 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     break;
   }
 
-  case MCFragment::FT_Data:
+  case MCFragment::FT_Data: {
+    MCDataFragment &DF = cast<MCDataFragment>(F);
+
+    // Apply the fixups.
+    //
+    // FIXME: Move elsewhere.
+    for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(),
+           ie = DF.fixup_end(); it != ie; ++it)
+      MOW.ApplyFixup(*it, DF);
+
     OS << cast<MCDataFragment>(F).getContents().str();
     break;
+  }
 
   case MCFragment::FT_Fill: {
     MCFillFragment &FF = cast<MCFillFragment>(F);
-
-    int64_t Value = 0;
-
-    MCValue Target;
-    if (!FF.getValue().EvaluateAsRelocatable(Target))
-      llvm_report_error("expected relocatable expression");
-
-    if (Target.isAbsolute())
-      Value = Target.getConstant();
     for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
-      if (!Target.isAbsolute()) {
-        // Find the fixup.
-        //
-        // FIXME: Find a better way to write in the fixes.
-        const MCSectionData::Fixup *Fixup =
-          F.getParent()->LookupFixup(&F, i * FF.getValueSize());
-        assert(Fixup && "Missing fixup for fill value!");
-        Value = Fixup->FixedValue;
-      }
-
       switch (FF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (Value)); break;
-      case 2: MOW.Write16(uint16_t(Value)); break;
-      case 4: MOW.Write32(uint32_t(Value)); break;
-      case 8: MOW.Write64(uint64_t(Value)); break;
+      case 1: MOW.Write8 (uint8_t (FF.getValue())); break;
+      case 2: MOW.Write16(uint16_t(FF.getValue())); break;
+      case 4: MOW.Write32(uint32_t(FF.getValue())); break;
+      case 8: MOW.Write64(uint64_t(FF.getValue())); break;
       }
     }
     break;
@@ -1162,6 +1167,10 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
 }
 
 void MCAssembler::Finish() {
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - pre-layout\n--\n";
+      dump(); });
+
   // Layout the concrete sections and fragments.
   uint64_t Address = 0;
   MCSectionData *Prev = 0;
@@ -1200,9 +1209,149 @@ void MCAssembler::Finish() {
     Address += SD.getSize();
   }
 
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - post-layout\n--\n";
+      dump(); });
+
   // Write the object file.
   MachObjectWriter MOW(OS);
   MOW.WriteObject(*this);
 
   OS.flush();
 }
+
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
+  OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value
+     << " Kind:" << AF.Kind << ">";
+  return OS;
+}
+
+}
+
+void MCFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCFragment " << (void*) this << " Offset:" << Offset
+     << " FileSize:" << FileSize;
+
+  OS << ">";
+}
+
+void MCAlignFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAlignFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Alignment:" << getAlignment()
+     << " Value:" << getValue() << " ValueSize:" << getValueSize()
+     << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">";
+}
+
+void MCDataFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCDataFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Contents:[";
+  for (unsigned i = 0, e = getContents().size(); i != e; ++i) {
+    if (i) OS << ",";
+    OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+  }
+  OS << "] (" << getContents().size() << " bytes)";
+
+  if (!getFixups().empty()) {
+    OS << ",\n       ";
+    OS << " Fixups:[";
+    for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) {
+      if (it != fixup_begin()) OS << ",\n            ";
+      OS << *it;
+    }
+    OS << "]";
+  }
+
+  OS << ">";
+}
+
+void MCFillFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCFillFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Value:" << getValue() << " ValueSize:" << getValueSize()
+     << " Count:" << getCount() << ">";
+}
+
+void MCOrgFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCOrgFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Offset:" << getOffset() << " Value:" << getValue() << ">";
+}
+
+void MCZeroFillFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCZeroFillFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">";
+}
+
+void MCSectionData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSectionData";
+  OS << " Alignment:" << getAlignment() << " Address:" << Address
+     << " Size:" << Size << " FileSize:" << FileSize
+     << " Fragments:[";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n      ";
+    it->dump();
+  }
+  OS << "]>";
+}
+
+void MCSymbolData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSymbolData Symbol:" << getSymbol()
+     << " Fragment:" << getFragment() << " Offset:" << getOffset()
+     << " Flags:" << getFlags() << " Index:" << getIndex();
+  if (isCommon())
+    OS << " (common, size:" << getCommonSize()
+       << " align: " << getCommonAlignment() << ")";
+  if (isExternal())
+    OS << " (external)";
+  if (isPrivateExtern())
+    OS << " (private extern)";
+  OS << ">";
+}
+
+void MCAssembler::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAssembler\n";
+  OS << "  Sections:[";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "],\n";
+  OS << "  Symbols:[";
+
+  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+    if (it != symbol_begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "]>\n";
+}
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index c122763..accb06c 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -16,3 +16,15 @@ MCCodeEmitter::MCCodeEmitter() {
 
 MCCodeEmitter::~MCCodeEmitter() {
 }
+
+const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
+  static const MCFixupKindInfo Builtins[] = {
+    { "FK_Data_1", 0, 8 },
+    { "FK_Data_2", 0, 16 },
+    { "FK_Data_4", 0, 32 },
+    { "FK_Data_8", 0, 64 }
+  };
+  
+  assert(Kind <= 3 && "Unknown fixup kind");
+  return Builtins[Kind];
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 1ee1b1b..e419043 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -17,6 +17,8 @@ using namespace llvm;
 
 void MCExpr::print(raw_ostream &OS) const {
   switch (getKind()) {
+  case MCExpr::Target:
+    return cast<MCTargetExpr>(this)->PrintImpl(OS);
   case MCExpr::Constant:
     OS << cast<MCConstantExpr>(*this).getValue();
     return;
@@ -131,6 +133,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) {
   return Create(Ctx.GetOrCreateSymbol(Name), Ctx);
 }
 
+void MCTargetExpr::Anchor() {}
 
 /* *** */
 
@@ -168,6 +171,9 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A,
 
 bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
   switch (getKind()) {
+  case Target:
+    return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res);
+      
   case Constant:
     Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
     return true;
@@ -246,8 +252,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
     }
 
     // FIXME: We need target hooks for the evaluation. It may be limited in
-    // width, and gas defines the result of comparisons differently from Apple
-    // as (the result is sign extended).
+    // width, and gas defines the result of comparisons and right shifts
+    // differently from Apple as.
     int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
     int64_t Result = 0;
     switch (ABE->getOpcode()) {
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index e90c03c..92a7154 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,7 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 
 MCInstPrinter::~MCInstPrinter() {
 }
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return "";
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index e559c65..0c9627d 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -46,13 +46,9 @@ class MCMachOStreamer : public MCStreamer {
 
 private:
   MCAssembler Assembler;
-
   MCCodeEmitter *Emitter;
-
   MCSectionData *CurSectionData;
-
   DenseMap<const MCSection*, MCSectionData*> SectionMap;
-  
   DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
 
 private:
@@ -91,6 +87,7 @@ public:
 
   const MCExpr *AddValueSymbols(const MCExpr *Value) {
     switch (Value->getKind()) {
+    case MCExpr::Target: assert(0 && "Can't handle target exprs yet!");
     case MCExpr::Constant:
       break;
 
@@ -124,6 +121,9 @@ public:
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    assert(0 && "macho doesn't support this directive");
+  }
   virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
     assert(0 && "macho doesn't support this directive");
   }
@@ -131,11 +131,22 @@ public:
                             unsigned Size = 0, unsigned ByteAlignment = 0);
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
   virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
+  virtual void EmitGPRel32Value(const MCExpr *Value) {
+    assert(0 && "macho doesn't support this directive");
+  }
   virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                     unsigned ValueSize = 1,
                                     unsigned MaxBytesToEmit = 0);
   virtual void EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
+  
+  virtual void EmitFileDirective(StringRef Filename) {
+    errs() << "FIXME: MCMachoStreamer:EmitFileDirective not implemented\n";
+  }
+  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    errs() << "FIXME: MCMachoStreamer:EmitDwarfFileDirective not implemented\n";
+  }
+  
   virtual void EmitInstruction(const MCInst &Inst);
   virtual void Finish();
 
@@ -220,6 +231,12 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   // defined.
   switch (Attribute) {
   case MCSA_Invalid:
+  case MCSA_ELF_TypeFunction:
+  case MCSA_ELF_TypeIndFunction:
+  case MCSA_ELF_TypeObject:
+  case MCSA_ELF_TypeTLS:
+  case MCSA_ELF_TypeCommon:
+  case MCSA_ELF_TypeNoType:
   case MCSA_IndirectSymbol:
   case MCSA_Hidden:
   case MCSA_Internal:
@@ -316,7 +333,24 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
 
 void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
                                 unsigned AddrSpace) {
-  new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData);
+  // Assume the front-end will have evaluate things absolute expressions, so
+  // just create data + fixup.
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
+
+  // Avoid fixups when possible.
+  int64_t AbsValue;
+  if (Value->EvaluateAsAbsolute(AbsValue)) {
+    // FIXME: Endianness assumption.
+    for (unsigned i = 0; i != Size; ++i)
+      DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+  } else {
+    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(),
+                                         *AddValueSymbols(Value),
+                                         MCFixup::getKindForSize(Size)));
+    DF->getContents().resize(DF->getContents().size() + Size, 0);
+  }
 }
 
 void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -346,13 +380,25 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
   if (!Emitter)
     llvm_unreachable("no code emitter available!");
 
-  // FIXME: Emitting an instruction should cause S_ATTR_SOME_INSTRUCTIONS to
-  //        be set for the current section.
-  // FIXME: Relocations!
+  CurSectionData->setHasInstructions(true);
+
+  SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  Emitter->EncodeInstruction(Inst, VecOS);
-  EmitBytes(VecOS.str(), 0);
+  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(),
+                                         *F.getValue(),
+                                         F.getKind()));
+  }
+  DF->getContents().append(Code.begin(), Code.end());
 }
 
 void MCMachOStreamer::Finish() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 7c219b3..46e9ebf 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -38,7 +38,7 @@ namespace {
     virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
 
     virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
-
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
     virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                   unsigned ByteAlignment) {}
     virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
@@ -50,7 +50,7 @@ namespace {
 
     virtual void EmitValue(const MCExpr *Value, unsigned Size,
                            unsigned AddrSpace) {}
-
+    virtual void EmitGPRel32Value(const MCExpr *Value) {}
     virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                       unsigned ValueSize = 1,
                                       unsigned MaxBytesToEmit = 0) {}
@@ -58,6 +58,8 @@ namespace {
     virtual void EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) {}
     
+    virtual void EmitFileDirective(StringRef Filename) {}
+    virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {}
     virtual void EmitInstruction(const MCInst &Inst) {}
 
     virtual void Finish() {}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index dd438b7..51ad5d1 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -325,18 +325,25 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) {
 ///  expr ::= primaryexpr
 ///
 bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  // Parse the expression.
   Res = 0;
-  return ParsePrimaryExpr(Res, EndLoc) ||
-         ParseBinOpRHS(1, Res, EndLoc);
-}
-
-bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
-  if (ParseParenExpr(Res, EndLoc))
+  if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
     return true;
 
+  // Try to constant fold it up front, if possible.
+  int64_t Value;
+  if (Res->EvaluateAsAbsolute(Value))
+    Res = MCConstantExpr::Create(Value, getContext());
+
   return false;
 }
 
+bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  Res = 0;
+  return ParseParenExpr(Res, EndLoc) ||
+         ParseBinOpRHS(1, Res, EndLoc);
+}
+
 bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
   const MCExpr *Expr;
   
@@ -1709,14 +1716,18 @@ bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
   if (Lexer.isNot(AsmToken::String))
     return TokError("unexpected token in '.file' directive");
   
-  StringRef ATTRIBUTE_UNUSED FileName = getTok().getString();
+  StringRef Filename = getTok().getString();
+  Filename = Filename.substr(1, Filename.size()-2);
   Lex();
 
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.file' directive");
 
-  // FIXME: Do something with the .file.
-
+  if (FileNumber == -1)
+    Out.EmitFileDirective(Filename);
+  else
+    Out.EmitDwarfFileDirective(FileNumber, Filename);
+  
   return false;
 }
 
diff --git a/lib/MC/MCParser/Makefile b/lib/MC/MCParser/Makefile
index e4eb483..4477757 100644
--- a/lib/MC/MCParser/Makefile
+++ b/lib/MC/MCParser/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMMCParser
 BUILD_ARCHIVE := 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index 371776f..a661fa6 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -11,7 +11,6 @@ LEVEL = ../..
 LIBRARYNAME = LLVMMC
 BUILD_ARCHIVE := 1
 PARALLEL_DIRS := MCParser
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 9d14684..3bce3f3 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -273,7 +273,7 @@ APInt& APInt::operator-=(const APInt& RHS) {
   return clearUnusedBits();
 }
 
-/// Multiplies an integer array, x by a a uint64_t integer and places the result
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
 /// into dest.
 /// @returns the carry out of the multiplication.
 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
@@ -767,8 +767,23 @@ bool APInt::isPowerOf2() const {
 }
 
 unsigned APInt::countLeadingZerosSlowCase() const {
-  unsigned Count = 0;
-  for (unsigned i = getNumWords(); i > 0u; --i) {
+  // Treat the most significand word differently because it might have
+  // meaningless bits set beyond the precision.
+  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+  integerPart MSWMask;
+  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+  else {
+    MSWMask = ~integerPart(0);
+    BitsInMSW = APINT_BITS_PER_WORD;
+  }
+
+  unsigned i = getNumWords();
+  integerPart MSW = pVal[i-1] & MSWMask;
+  if (MSW)
+    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+  unsigned Count = BitsInMSW;
+  for (--i; i > 0u; --i) {
     if (pVal[i-1] == 0)
       Count += APINT_BITS_PER_WORD;
     else {
@@ -776,10 +791,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
       break;
     }
   }
-  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
-  if (remainder)
-    Count -= APINT_BITS_PER_WORD - remainder;
-  return std::min(Count, BitWidth);
+  return Count;
 }
 
 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
@@ -1754,7 +1766,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
 
   // First, compose the values into an array of 32-bit words instead of
   // 64-bit words. This is a necessity of both the "short division" algorithm
-  // and the the Knuth "classical algorithm" which requires there to be native
+  // and the Knuth "classical algorithm" which requires there to be native
   // operations for +, -, and * on an m bit value with an m*2 bit result. We
   // can't use 64-bit operands here because we don't have native results of
   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index fa692be8..961dc1f 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -507,8 +507,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
 
   // Copy the program name into ProgName, making sure not to overflow it.
   std::string ProgName = sys::Path(argv[0]).getLast();
-  if (ProgName.size() > 79) ProgName.resize(79);
-  strcpy(ProgramName, ProgName.c_str());
+  size_t Len = std::min(ProgName.size(), size_t(79));
+  memcpy(ProgramName, ProgName.data(), Len);
+  ProgramName[Len] = '\0';
 
   ProgramOverview = Overview;
   bool ErrorParsing = false;
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index ddf14e3..2746f7a 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -540,6 +540,11 @@ ConstantRange::add(const ConstantRange &Other) const {
 
 ConstantRange
 ConstantRange::multiply(const ConstantRange &Other) const {
+  // TODO: If either operand is a single element and the multiply is known to
+  // be non-wrapping, round the result min and max value to the appropriate
+  // multiple of that element. If wrapping is possible, at least adjust the
+  // range according to the greatest power-of-two factor of the single element.
+
   if (isEmptySet() || Other.isEmptySet())
     return ConstantRange(getBitWidth(), /*isFullSet=*/false);
   if (isFullSet() || Other.isFullSet())
@@ -650,7 +655,12 @@ ConstantRange::lshr(const ConstantRange &Amount) const {
 /// print - Print out the bounds to a stream...
 ///
 void ConstantRange::print(raw_ostream &OS) const {
-  OS << "[" << Lower << "," << Upper << ")";
+  if (isFullSet())
+    OS << "full-set";
+  else if (isEmptySet())
+    OS << "empty-set";
+  else
+    OS << "[" << Lower << "," << Upper << ")";
 }
 
 /// dump - Allow printing from a debugger easily...
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 21080b6..095395f 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/System/Path.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Path.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include <cstdlib>
 #include <cstring>
 #include <cctype>
@@ -139,11 +139,11 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
       Diff = 0;  // Both zero.
     if (Diff > RelTolerance) {
       if (ErrorMsg) {
-        *ErrorMsg = "Compared: " + ftostr(V1) + " and " + ftostr(V2) + "\n";
-        *ErrorMsg += "abs. diff = " + ftostr(std::abs(V1-V2)) + 
-                     " rel.diff = " + ftostr(Diff) + "\n";
-        *ErrorMsg += "Out of tolerance: rel/abs: " + ftostr(RelTolerance) +
-                     "/" + ftostr(AbsTolerance);
+        raw_string_ostream(*ErrorMsg)
+          << "Compared: " << V1 << " and " << V2 << '\n'
+          << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
+          << "Out of tolerance: rel/abs: " << RelTolerance << '/'
+          << AbsTolerance;
       }
       return true;
     }
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 9ab3666..39b6cb3 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -59,12 +59,13 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
 /// \param MinPad - The minimum space to give after the most recent
 /// I/O, even if the current column + minpad > newcol.
 ///
-void formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
   // Figure out what's in the buffer and add it to the column count.
   ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
 
   // Output spaces until we reach the desired column.
   indent(std::max(int(NewCol - ColumnScanned), 1));
+  return *this;
 }
 
 void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index bdc637a..83c7964 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -35,7 +35,7 @@ SourceMgr::~SourceMgr() {
   // Delete the line # cache if allocated.
   if (LineNoCacheTy *Cache = getCache(LineNoCache))
     delete Cache;
-    
+
   while (!Buffers.empty()) {
     delete Buffers.back().Buffer;
     Buffers.pop_back();
@@ -47,7 +47,7 @@ SourceMgr::~SourceMgr() {
 /// ~0, otherwise it returns the buffer ID of the stacked file.
 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
                                    SMLoc IncludeLoc) {
-  
+
   MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
 
   // If the file didn't exist directly, see if it's in an include path.
@@ -55,7 +55,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
     std::string IncFile = IncludeDirectories[i] + "/" + Filename;
     NewBuf = MemoryBuffer::getFile(IncFile.c_str());
   }
- 
+
   if (NewBuf == 0) return ~0U;
 
   return AddNewSourceBuffer(NewBuf, IncludeLoc);
@@ -79,20 +79,20 @@ int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
 unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
   if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
   assert(BufferID != -1 && "Invalid Location!");
-  
+
   MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
-  
+
   // Count the number of \n's between the start of the file and the specified
   // location.
   unsigned LineNo = 1;
-  
+
   const char *Ptr = Buff->getBufferStart();
 
   // If we have a line number cache, and if the query is to a later point in the
   // same file, start searching from the last query location.  This optimizes
   // for the case when multiple diagnostics come out of one file in order.
   if (LineNoCacheTy *Cache = getCache(LineNoCache))
-    if (Cache->LastQueryBufferID == BufferID && 
+    if (Cache->LastQueryBufferID == BufferID &&
         Cache->LastQuery <= Loc.getPointer()) {
       Ptr = Cache->LastQuery;
       LineNo = Cache->LineNoOfQuery;
@@ -102,12 +102,12 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
   // we see.
   for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
     if (*Ptr == '\n') ++LineNo;
-  
-  
+
+
   // Allocate the line number cache if it doesn't exist.
   if (LineNoCache == 0)
     LineNoCache = new LineNoCacheTy();
-  
+
   // Update the line # cache.
   LineNoCacheTy &Cache = *getCache(LineNoCache);
   Cache.LastQueryBufferID = BufferID;
@@ -118,12 +118,12 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
 
 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
   if (IncludeLoc == SMLoc()) return;  // Top of stack.
-  
+
   int CurBuf = FindBufferContainingLoc(IncludeLoc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
 
   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
-  
+
   OS << "Included from "
      << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
      << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
@@ -137,12 +137,12 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
 /// prefixed to the message.
 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
                                    const char *Type, bool ShowLine) const {
-  
+
   // First thing to do: find the current buffer containing the specified
   // location.
   int CurBuf = FindBufferContainingLoc(Loc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
-  
+
   MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
 
   // Scan backward to find the start of the line.
@@ -160,7 +160,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
       ++LineEnd;
     LineStr = std::string(LineStart, LineEnd);
   }
-  
+
   std::string PrintedMsg;
   if (Type) {
     PrintedMsg = Type;
@@ -173,7 +173,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
                       LineStr, ShowLine);
 }
 
-void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, 
+void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg,
                              const char *Type, bool ShowLine) const {
   raw_ostream &OS = errs();
 
@@ -188,7 +188,7 @@ void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg,
 // SMDiagnostic Implementation
 //===----------------------------------------------------------------------===//
 
-void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) {
+void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
   if (ProgName && ProgName[0])
     S << ProgName << ": ";
 
@@ -197,7 +197,7 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) {
       S << "<stdin>";
     else
       S << Filename;
-  
+
     if (LineNo != -1) {
       S << ':' << LineNo;
       if (ColumnNo != -1)
@@ -205,12 +205,12 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) {
     }
     S << ": ";
   }
-  
+
   S << Message << '\n';
 
   if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
     S << LineContents << '\n';
-    
+
     // Print out spaces/tabs before the caret.
     for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
       S << (LineContents[i] == '\t' ? '\t' : ' ');
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 2fec094..5a76184 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -33,6 +33,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case ppc64:   return "powerpc64";
   case ppc:     return "powerpc";
   case sparc:   return "sparc";
+  case sparcv9: return "sparcv9";
   case systemz: return "s390x";
   case tce:     return "tce";
   case thumb:   return "thumb";
@@ -61,6 +62,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case ppc64:
   case ppc:     return "ppc";
 
+  case sparcv9:
   case sparc:   return "sparc";
 
   case x86:
@@ -127,6 +129,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return ppc;
   if (Name == "sparc")
     return sparc;
+  if (Name == "sparcv9")
+    return sparcv9;
   if (Name == "systemz")
     return systemz;
   if (Name == "tce")
@@ -250,6 +254,8 @@ void Triple::Parse() const {
     Arch = mipsel;
   else if (ArchName == "sparc")
     Arch = sparc;
+  else if (ArchName == "sparcv9")
+    Arch = sparcv9;
   else if (ArchName == "s390x")
     Arch = systemz;
   else if (ArchName == "tce")
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 10d7ec0..25c3fbd 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
+#include <cctype>
 #include <sys/stat.h>
 #include <sys/types.h>
 
@@ -209,7 +209,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
 }
 
 raw_ostream &raw_ostream::operator<<(double N) {
-  return this->operator<<(ftostr(N));
+  return this->operator<<(format("%e", N));
 }
 
 
@@ -574,12 +574,18 @@ void raw_svector_ostream::resync() {
 }
 
 void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
-  assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() &&
-         "Invalid write_impl() call!");
-
-  // We don't need to copy the bytes, just commit the bytes to the
-  // SmallVector.
-  OS.set_size(OS.size() + Size);
+  // If we're writing bytes from the end of the buffer into the smallvector, we
+  // don't need to copy the bytes, just commit the bytes because they are
+  // already in the right place.
+  if (Ptr == OS.end()) {
+    assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+    OS.set_size(OS.size() + Size);
+  } else {
+    assert(GetNumBytesInBuffer() == 0 &&
+           "Should be writing from buffer if some bytes in it");
+    // Otherwise, do copy the bytes.
+    OS.append(Ptr, Ptr+Size);
+  }
 
   // Grow the vector if necessary.
   if (OS.capacity() - OS.size() < 64)
diff --git a/lib/System/Makefile b/lib/System/Makefile
index d4fd60e..bb013b9 100644
--- a/lib/System/Makefile
+++ b/lib/System/Makefile
@@ -10,7 +10,7 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMSystem
 BUILD_ARCHIVE = 1
-
+REQUIRES_RTTI = 1
 include $(LEVEL)/Makefile.config
 
 ifeq ($(HOST_OS),MingW)
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index 43c3606..c10498a 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -126,7 +126,7 @@ static void TimeOutHandler(int Sig) {
 
 static void SetMemoryLimits (unsigned size)
 {
-#if HAVE_SYS_RESOURCE_H
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
   struct rlimit r;
   __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
 
@@ -323,4 +323,9 @@ bool Program::ChangeStdoutToBinary(){
   return false;
 }
 
+bool Program::ChangeStderrToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
 }
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 676e1e5..c8ec68a 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -52,7 +52,16 @@ static const int *const IntSigsEnd =
 // KillSigs - Signals that are synchronous with the program that will cause it
 // to die.
 static const int KillSigs[] = {
-  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+  , SIGSYS
+#endif
+#ifdef SIGXCPU
+  , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+  , SIGXFSZ
+#endif
 #ifdef SIGEMT
   , SIGEMT
 #endif
diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc
index a69826f..a3b40d0 100644
--- a/lib/System/Win32/Program.inc
+++ b/lib/System/Win32/Program.inc
@@ -379,4 +379,9 @@ bool Program::ChangeStdoutToBinary(){
   return result == -1;
 }
 
+bool Program::ChangeStderrToBinary(){
+  int result = _setmode( _fileno(stderr), _O_BINARY );
+  return result == -1;
+}
+
 }
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 21445ad..b08f942 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -23,9 +23,7 @@ namespace llvm {
 
 class ARMBaseTargetMachine;
 class FunctionPass;
-class MachineCodeEmitter;
 class JITCodeEmitter;
-class ObjectCodeEmitter;
 class formatted_raw_ostream;
 
 // Enums corresponding to ARM condition codes
@@ -95,12 +93,8 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 
-FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                       MachineCodeEmitter &MCE);
 FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE);
-FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                             ObjectCodeEmitter &OCE);
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index fd46a4a..6fe7c2c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -450,10 +450,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     switch (Opc) {
     default:
       llvm_unreachable("Unknown or unset size field for instr!");
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
       return 0;
     }
     break;
@@ -470,9 +470,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case ARM::Int_eh_sjlj_setjmp:
       return 24;
     case ARM::tInt_eh_sjlj_setjmp:
-      return 22;
+      return 14;
     case ARM::t2Int_eh_sjlj_setjmp:
-      return 22;
+      return 14;
     case ARM::BR_JTr:
     case ARM::BR_JTm:
     case ARM::BR_JTadd:
@@ -490,6 +490,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
         MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
       unsigned JTI = JTOP.getIndex();
       const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      assert(MJTI != 0);
       const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
       assert(JTI < JT.size());
       // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ba9e044..91e3550 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -478,7 +478,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 ///
 bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return (NoFramePointerElim ||
+  return ((NoFramePointerElim && MFI->hasCalls())||
           needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
-
-  // Calculate and set max stack object alignment early, so we can decide
-  // whether we will need stack realignment (and thus FP).
-  if (RealignStack) {
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    MFI->calculateMaxStackAlignment();
-  }
-
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
   // scratch register.
   // FIXME: It will be better just to find spare register here.
@@ -803,10 +795,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 }
 
 int
-ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI,
+ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
                                             unsigned &FrameReg) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
   bool isFixed = MFI->isFixedObjectIndex(FI);
 
@@ -845,7 +837,8 @@ ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI,
 
 
 int
-ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
   unsigned FrameReg;
   return getFrameIndexReference(MF, FI, FrameReg);
 }
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f5ca25c..33ba21d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -107,9 +107,9 @@ public:
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  int getFrameIndexReference(MachineFunction &MF, int FI,
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const;
-  int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 17e7d44..bd703f4 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -24,9 +24,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -46,42 +44,34 @@ STATISTIC(NumEmitted, "Number of machine instructions emitted");
 
 namespace {
 
-  class ARMCodeEmitter {
-  public:
-    /// getBinaryCodeForInstr - This function, generated by the
-    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
-    /// machine instructions.
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
-  };
-
-  template<class CodeEmitter>
-  class Emitter : public MachineFunctionPass, public ARMCodeEmitter {
+  class ARMCodeEmitter : public MachineFunctionPass {
     ARMJITInfo                *JTI;
     const ARMInstrInfo        *II;
     const TargetData          *TD;
     const ARMSubtarget        *Subtarget;
     TargetMachine             &TM;
-    CodeEmitter               &MCE;
+    JITCodeEmitter            &MCE;
     const std::vector<MachineConstantPoolEntry> *MCPEs;
     const std::vector<MachineJumpTableEntry> *MJTEs;
     bool IsPIC;
-
+    
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineModuleInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-
-  public:
+    
     static char ID;
-    explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
-      : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm),
-      MCE(mce), MCPEs(0), MJTEs(0),
-      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-    Emitter(TargetMachine &tm, CodeEmitter &mce,
-            const ARMInstrInfo &ii, const TargetData &td)
-      : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm),
-      MCE(mce), MCPEs(0), MJTEs(0),
-      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+  public:
+    ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()),
+        TD(tm.getTargetData()), TM(tm),
+    MCE(mce), MCPEs(0), MJTEs(0),
+    IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+    
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -94,21 +84,13 @@ namespace {
   private:
 
     void emitWordLE(unsigned Binary);
-
     void emitDWordLE(uint64_t Binary);
-
     void emitConstPoolInstruction(const MachineInstr &MI);
-
     void emitMOVi2piecesInstruction(const MachineInstr &MI);
-
     void emitLEApcrelJTInstruction(const MachineInstr &MI);
-
     void emitPseudoMoveInstruction(const MachineInstr &MI);
-
     void addPCLabel(unsigned LabelID);
-
     void emitPseudoInstruction(const MachineInstr &MI);
-
     unsigned getMachineSoRegOpValue(const MachineInstr &MI,
                                     const TargetInstrDesc &TID,
                                     const MachineOperand &MO,
@@ -176,28 +158,18 @@ namespace {
     void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
                                intptr_t JTBase = 0);
   };
-  template <class CodeEmitter>
-  char Emitter<CodeEmitter>::ID = 0;
 }
 
-/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
-/// to the specified MCE object.
+char ARMCodeEmitter::ID = 0;
 
-FunctionPass *llvm::createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                             MachineCodeEmitter &MCE) {
-  return new Emitter<MachineCodeEmitter>(TM, MCE);
-}
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM 
+/// code to the specified MCE object.
 FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
-  return new Emitter<JITCodeEmitter>(TM, JCE);
-}
-FunctionPass *llvm::createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
-                                                   ObjectCodeEmitter &OCE) {
-  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+  return new ARMCodeEmitter(TM, JCE);
 }
 
-template<class CodeEmitter>
-bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
@@ -206,7 +178,8 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
-  MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+  MJTEs = 0;
+  if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
   IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   JTI->Initialize(MF, IsPIC);
   MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
@@ -229,8 +202,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
 
 /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
 ///
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
   switch (ARM_AM::getAM2ShiftOpc(Imm)) {
   default: llvm_unreachable("Unknown shift opc!");
   case ARM_AM::asr: return 2;
@@ -244,9 +216,8 @@ unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
 
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
-                                                 const MachineOperand &MO) {
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) {
   if (MO.isReg())
     return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
   else if (MO.isImm())
@@ -276,10 +247,9 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
 
 /// emitGlobalAddress - Emit the specified address to the code stream.
 ///
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
-                                             bool MayNeedFarStub, bool Indirect,
-                                             intptr_t ACPV) {
+void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+                                       bool MayNeedFarStub, bool Indirect,
+                                       intptr_t ACPV) {
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
                                            GV, ACPV, MayNeedFarStub)
@@ -291,9 +261,7 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
 /// emitExternalSymbolAddress - Arrange for the address of an external symbol to
 /// be emitted to the current location in the function, and allow it to be PC
 /// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
-                                                     unsigned Reloc) {
+void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
   MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                  Reloc, ES));
 }
@@ -301,9 +269,7 @@ void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
 /// emitConstPoolAddress - Arrange for the address of an constant pool
 /// to be emitted to the current location in the function, and allow it to be PC
 /// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
-                                                unsigned Reloc) {
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) {
   // Tell JIT emitter we'll resolve the address.
   MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
                                                     Reloc, CPI, 0, true));
@@ -312,37 +278,31 @@ void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
 /// emitJumpTableAddress - Arrange for the address of a jump table to
 /// be emitted to the current location in the function, and allow it to be PC
 /// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex,
-                                                unsigned Reloc) {
+void ARMCodeEmitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) {
   MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
                                                     Reloc, JTIndex, 0, true));
 }
 
 /// emitMachineBasicBlock - Emit the specified address basic block.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB,
-                                              unsigned Reloc, intptr_t JTBase) {
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+                                           unsigned Reloc, intptr_t JTBase) {
   MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                              Reloc, BB, JTBase));
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) {
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
   DEBUG(errs() << "  0x";
         errs().write_hex(Binary) << "\n");
   MCE.emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) {
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
   DEBUG(errs() << "  0x";
         errs().write_hex(Binary) << "\n");
   MCE.emitDWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
   DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
 
   MCE.processDebugLoc(MI.getDebugLoc(), true);
@@ -411,8 +371,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
   MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
   unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
   unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
   const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
@@ -474,8 +433,7 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
   }
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   const MachineOperand &MO0 = MI.getOperand(0);
   const MachineOperand &MO1 = MI.getOperand(1);
   assert(MO1.isImm() && ARM_AM::getSOImmVal(MO1.isImm()) != -1 &&
@@ -517,8 +475,7 @@ void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   // It's basically add r, pc, (LJTI - $+8)
 
   const TargetInstrDesc &TID = MI.getDesc();
@@ -545,8 +502,7 @@ void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
   unsigned Opcode = MI.getDesc().Opcode;
 
   // Part of binary is determined by TableGn.
@@ -585,21 +541,19 @@ void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) {
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
   DEBUG(errs() << "  ** LPC" << LabelID << " @ "
         << (void*)MCE.getCurrentPCValue() << '\n');
   JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   unsigned Opcode = MI.getDesc().Opcode;
   switch (Opcode) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
   // FIXME: Add support for MOVimm32.
-  case TargetInstrInfo::INLINEASM: {
+  case TargetOpcode::INLINEASM: {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
     if (MI.getOperand(0).getSymbolName()[0]) {
@@ -607,12 +561,12 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
     }
     break;
   }
-  case TargetInstrInfo::DBG_LABEL:
-  case TargetInstrInfo::EH_LABEL:
+  case TargetOpcode::DBG_LABEL:
+  case TargetOpcode::EH_LABEL:
     MCE.emitLabel(MI.getOperand(0).getImm());
     break;
-  case TargetInstrInfo::IMPLICIT_DEF:
-  case TargetInstrInfo::KILL:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
     // Do nothing.
     break;
   case ARM::CONSTPOOL_ENTRY:
@@ -661,8 +615,7 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
   }
 }
 
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(
                                                 const MachineInstr &MI,
                                                 const TargetInstrDesc &TID,
                                                 const MachineOperand &MO,
@@ -721,8 +674,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
   return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
 }
 
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
   int SoImmVal = ARM_AM::getSOImmVal(SoImm);
   assert(SoImmVal != -1 && "Not a valid so_imm value!");
 
@@ -735,8 +687,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
   return Binary;
 }
 
-template<class CodeEmitter>
-unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI,
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
                                              const TargetInstrDesc &TID) const {
   for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
@@ -746,8 +697,7 @@ unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI,
   return 0;
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitDataProcessingInstruction(
+void ARMCodeEmitter::emitDataProcessingInstruction(
                                                    const MachineInstr &MI,
                                                    unsigned ImplicitRd,
                                                    unsigned ImplicitRn) {
@@ -813,8 +763,7 @@ void Emitter<CodeEmitter>::emitDataProcessingInstruction(
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitLoadStoreInstruction(
+void ARMCodeEmitter::emitLoadStoreInstruction(
                                               const MachineInstr &MI,
                                               unsigned ImplicitRd,
                                               unsigned ImplicitRn) {
@@ -889,8 +838,7 @@ void Emitter<CodeEmitter>::emitLoadStoreInstruction(
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
                                                         unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
@@ -977,8 +925,7 @@ static unsigned getAddrModeUPBits(unsigned Mode) {
   return Binary;
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(
                                                        const MachineInstr &MI) {
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1011,8 +958,7 @@ void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
@@ -1049,8 +995,7 @@ void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
@@ -1087,8 +1032,7 @@ void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
@@ -1126,8 +1070,7 @@ void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   if (TID.Opcode == ARM::TPsoft) {
@@ -1146,8 +1089,7 @@ void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
   // Remember the base address of the inline jump table.
   uintptr_t JTBase = MCE.getCurrentPCValue();
   JTI->addJumpTableBaseAddr(JTIndex, JTBase);
@@ -1167,8 +1109,7 @@ void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
   }
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Handle jump tables.
@@ -1249,8 +1190,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
   return Binary;
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
@@ -1289,8 +1229,7 @@ void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVFPConversionInstruction(
+void ARMCodeEmitter::emitVFPConversionInstruction(
       const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
@@ -1347,8 +1286,7 @@ void Emitter<CodeEmitter>::emitVFPConversionInstruction(
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
@@ -1382,8 +1320,7 @@ void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
+void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
                                                        const MachineInstr &MI) {
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1418,8 +1355,7 @@ void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
   emitWordLE(Binary);
 }
 
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) {
+void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index acd30d2..8fa3c04 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -302,9 +302,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   // Thumb1 functions containing constant pools get 4-byte alignment.
   // This is so we can keep exact track of where the alignment padding goes.
 
-  // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte
-  // aligned.
-  AFI->setAlign(isThumb1 ? 1U : 2U);
+  // ARM and Thumb2 functions need to be 4-byte aligned.
+  if (!isThumb1)
+    MF.EnsureAlignment(2);  // 2 = log2(4)
 
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
@@ -312,7 +312,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   if (!MCP.isEmpty()) {
     DoInitialPlacement(MF, CPEMIs);
     if (isThumb1)
-      AFI->setAlign(2U);
+      MF.EnsureAlignment(2);  // 2 = log2(4)
   }
 
   /// The next UID to take is the first unused one.
@@ -506,7 +506,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
         case ARM::tBR_JTr:
           // A Thumb1 table jump may involve padding; for the offsets to
           // be right, functions containing these must be 4-byte aligned.
-          AFI->setAlign(2U);
+          MF.EnsureAlignment(2U);
           if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
             // FIXME: Add a pseudo ALIGN instruction instead.
             MBBSize += 2;           // padding
@@ -732,7 +732,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
 
     // This pass should be run after register allocation, so there should be no
     // PHI nodes to update.
-    assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+    assert((Succ->empty() || !Succ->begin()->isPHI())
            && "PHI nodes should be eliminated by now!");
   }
 
@@ -1624,6 +1624,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
   // FIXME: After the tables are shrunk, can we get rid some of the
   // constantpool tables?
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+  
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
@@ -1730,6 +1732,8 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
   bool MadeChange = false;
 
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+  
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a260050..df4ae70 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -58,6 +58,8 @@ public:
     return "ARM Instruction Selection";
   }
 
+  virtual void InstructionSelect();
+
   /// getI32Imm - Return a target constant of type i32 with the specified
   /// value.
   inline SDValue getI32Imm(unsigned Imm) {
@@ -65,7 +67,7 @@ public:
   }
 
   SDNode *Select(SDNode *N);
-  virtual void InstructionSelect();
+
   bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
   bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
@@ -1007,12 +1009,12 @@ SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue Undef =
-    SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0);
+    SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
-  SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+  SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
                                         VT, Undef, V0, SubReg0);
-  return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+  return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
                                 VT, SDValue(Pair, 0), V1, SubReg1);
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 76c6a27..adf1644 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -863,7 +863,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
   }
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset);
+                      PseudoSourceValue::getStack(), LocMemOffset,
+                      false, false, 0);
 }
 
 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -897,11 +898,13 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
 SDValue
 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall,
+                             bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) {
+  // ARM target does not yet support tail call optimization.
+  isTailCall = false;
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1029,7 +1032,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0);
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1050,7 +1054,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0);
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1236,7 +1241,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1259,7 +1265,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1306,21 +1313,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
     Chain = Offset.getValue(1);
 
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   } else {
     // local exec model
     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -1356,13 +1366,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                  CPAddr,
-                                 PseudoSourceValue::getConstantPool(), 0);
+                                 PseudoSourceValue::getConstantPool(), 0,
+                                 false, false, 0);
     SDValue Chain = Result.getValue(1);
     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
     if (!UseGOTOFF)
       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           PseudoSourceValue::getGOT(), 0);
+                           PseudoSourceValue::getGOT(), 0,
+                           false, false, 0);
     return Result;
   } else {
     // If we have T2 ops, we can materialize the address directly via movt/movw
@@ -1374,7 +1386,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
       SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
     }
   }
 }
@@ -1401,7 +1414,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   SDValue Chain = Result.getValue(1);
 
   if (RelocM == Reloc::PIC_) {
@@ -1411,7 +1425,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
     Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0,
+                         false, false, 0);
 
   return Result;
 }
@@ -1432,13 +1447,15 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
 SDValue
-ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                           const ARMSubtarget *Subtarget) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
   switch (IntNo) {
@@ -1464,7 +1481,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result =
       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                  PseudoSourceValue::getConstantPool(), 0);
+                  PseudoSourceValue::getConstantPool(), 0,
+                  false, false, 0);
     SDValue Chain = Result.getValue(1);
 
     if (RelocM == Reloc::PIC_) {
@@ -1474,7 +1492,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     return Result;
   }
   case Intrinsic::eh_sjlj_setjmp:
-    return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1));
+    SDValue Val = Subtarget->isThumb() ?
+      DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
+      DAG.getConstant(0, MVT::i32);
+    return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
+                       Val);
   }
 }
 
@@ -1508,7 +1530,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 SDValue
@@ -1585,7 +1608,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
-                            PseudoSourceValue::getFixedStack(FI), 0);
+                            PseudoSourceValue::getFixedStack(FI), 0,
+                            false, false, 0);
   } else {
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -1700,7 +1724,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0));
+                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   false, false, 0));
     }
   }
 
@@ -1738,7 +1763,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                        PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0);
+                                     PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
+                                     false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -1932,13 +1958,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0);
+                       PseudoSourceValue::getJumpTable(), 0,
+                       false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   } else {
     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0);
+                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   }
@@ -1986,7 +2013,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -2031,7 +2059,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
       Loads[i] = DAG.getLoad(VT, dl, Chain,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                          DAG.getConstant(SrcOff, MVT::i32)),
-                             SrcSV, SrcSVOff + SrcOff);
+                             SrcSV, SrcSVOff + SrcOff, false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
@@ -2040,9 +2068,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     for (i = 0;
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
-                           DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                       DAG.getConstant(DstOff, MVT::i32)),
-                           DstSV, DstSVOff + DstOff);
+                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                          DAG.getConstant(DstOff, MVT::i32)),
+                              DstSV, DstSVOff + DstOff, false, false, 0);
       DstOff += VTSize;
     }
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
@@ -2068,7 +2096,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                        DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcSV, SrcSVOff + SrcOff);
+                           SrcSV, SrcSVOff + SrcOff, false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
@@ -2090,7 +2118,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                         DAG.getConstant(DstOff, MVT::i32)),
-                            DstSV, DstSVOff + DstOff);
+                            DstSV, DstSVOff + DstOff, false, false, 0);
     ++i;
     DstOff += VTSize;
     BytesLeft -= VTSize;
@@ -3023,7 +3051,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::RETURNADDR:    break;
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
-  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+                                                               Subtarget);
   case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
   case ISD::SHL:
   case ISD::SRL:
@@ -3852,8 +3881,11 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   if (!Subtarget->hasV6Ops())
     // Pre-v6 does not support unaligned mem access.
     return false;
-  else if (!Subtarget->hasV6Ops()) {
-    // v6 may or may not support unaligned mem access.
+  else {
+    // v6+ may or may not support unaligned mem access depending on the system
+    // configuration.
+    // FIXME: This is pretty conservative. Should we provide cmdline option to
+    // control the behaviour?
     if (!Subtarget->isTargetDarwin())
       return false;
   }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index cd9c027..3c5df45 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -278,7 +278,8 @@ namespace llvm {
                              const CCValAssign &VA,
                              ISD::ArgFlagsTy Flags);
     SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                    const ARMSubtarget *Subtarget);
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
     SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
@@ -319,7 +320,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool isTailCall,
+                bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 28b2821..db60458 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -56,6 +56,9 @@ def NEONGetLnFrm  : Format<25>;
 def NEONSetLnFrm  : Format<26>;
 def NEONDupFrm    : Format<27>;
 
+def MiscFrm       : Format<29>;
+def ThumbMiscFrm  : Format<30>;
+
 // Misc flags.
 
 // the instruction has a Rn register operand.
@@ -1246,75 +1249,99 @@ class AXSI5<dag oops, dag iops, InstrItinClass itin,
 }
 
 // Double precision, unary
-class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
-  let Inst{27-20} = opcod1;
-  let Inst{19-16} = opcod2;
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
   let Inst{11-8}  = 0b1011;
-  let Inst{7-4}   = opcod3;
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
 }
 
 // Double precision, binary
-class ADbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
-           string opc, string asm, list<dag> pattern>
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+       dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
-  let Inst{27-20} = opcod;
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
   let Inst{11-8}  = 0b1011;
+  let Inst{6} = op6;
+  let Inst{4} = op4;
 }
 
 // Single precision, unary
-class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
-  // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding.
-  let Inst{27-20} = opcod1;
-  let Inst{19-16} = opcod2;
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
   let Inst{11-8}  = 0b1010;
-  let Inst{7-4}   = opcod3;
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
 }
 
 // Single precision unary, if no NEON
 // Same as ASuI except not available if NEON is enabled
-class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
-            InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : ASuI<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> {
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+            string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+         pattern> {
   list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
 }
 
 // Single precision, binary
-class ASbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
-           string opc, string asm, list<dag> pattern>
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+           InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
-  // Bit 22 (D bit) can be changed during instruction encoding.
-  let Inst{27-20} = opcod;
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
   let Inst{11-8}  = 0b1010;
+  let Inst{6} = op6;
+  let Inst{4} = op4;
 }
 
 // Single precision binary, if no NEON
 // Same as ASbI except not available if NEON is enabled
-class ASbIn<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : ASbI<opcod, oops, iops, itin, opc, asm, pattern> {
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+       dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
   list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
 }
 
 // VFP conversion instructions
-class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
-               dag oops, dag iops, InstrItinClass itin,
-               string opc, string asm, list<dag> pattern>
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+               dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+               list<dag> pattern>
   : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
-  let Inst{27-20} = opcod1;
-  let Inst{19-16} = opcod2;
-  let Inst{11-8}  = opcod3;
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-8}  = opcod4;
   let Inst{6}     = 1;
+  let Inst{4}     = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+               dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+               list<dag> pattern>
+  : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+  // size (fixed-point number): sx == 0 ? 16 : 32
+  let Inst{7} = op5; // sx
 }
 
 // VFP conversion instructions, if no NEON
-class AVConv1In<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
                 dag oops, dag iops, InstrItinClass itin,
                 string opc, string asm, list<dag> pattern>
-  : AVConv1I<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> {
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
   list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index af508ee..1c6f78a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -44,7 +44,8 @@ def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
                                           SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
 
 def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
-def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+                                                 SDTCisInt<2>]>;
 
 def SDT_ARMMEMBARRIERV7  : SDTypeProfile<0, 0, []>;
 def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
@@ -604,6 +605,102 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
            [(ARMcallseq_start timm:$amt)]>;
 }
 
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000011;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
+              [/* For disassembly only; pattern left blank */]>,
+           Requires<[IsARM]> {
+  let Inst{27-20} = 0b00010010;
+  let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
+              [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 0;
+  let Inst{5} = 0;
+}
+
+def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
+                   [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 1;
+  let Inst{9} = 1;
+  let Inst{7-4} = 0b0000;
+}
+
+def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
+                   [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 1;
+  let Inst{9} = 0;
+  let Inst{7-4} = 0b0000;
+}
+
+def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV7]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-4} = 0b1111;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
+              [/* For disassembly only; pattern left blank */]>,
+           Requires<[IsARM]> {
+  let Inst{27-25} = 0b011;
+  let Inst{24-20} = 0b11111;
+  let Inst{7-5} = 0b111;
+  let Inst{4} = 0b1;
+}
+
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -826,6 +923,20 @@ let isBranch = 1, isTerminator = 1 in {
                [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
 }
 
+// Branch and Exchange Jazelle -- for disassembly only
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  //let Inst{19-8} = 0xfff;
+  let Inst{7-4} = 0b0010;
+}
+
+// Supervisor call (software interrupt) -- for disassembly only
+let isCall = 1 in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+              [/* For disassembly only; pattern left blank */]>;
+}
+
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
@@ -908,6 +1019,20 @@ def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                    "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
+// LDRT and LDRBT are for disassembly only.
+
+def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+                   (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+                   "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
+def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+                   (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+                   "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
 // Store
 def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
                "str", "\t$src, $addr",
@@ -971,6 +1096,24 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
                     [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
+// STRT and STRBT are for disassembly only.
+
+def STRT : AI2stwpo<(outs GPR:$base_wb),
+                    (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                    StFrm, IIC_iStoreru,
+                    "strt", "\t$src, [$base], $offset", "$base = $base_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+def STRBT : AI2stbpo<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     StFrm, IIC_iStoreru,
+                     "strbt", "\t$src, [$base], $offset", "$base = $base_wb",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -1015,7 +1158,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
                  DPFrm, IIC_iMOVi,
                  "movw", "\t$dst, $src",
                  [(set GPR:$dst, imm0_65535:$src)]>,
-                 Requires<[IsARM, HasV6T2]> {
+                 Requires<[IsARM, HasV6T2]>, UnaryDP {
   let Inst{20} = 0;
   let Inst{25} = 1;
 }
@@ -1215,6 +1358,33 @@ def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
 // (mul X, 2^n+1) -> (add (X << n), X)
 // (mul X, 2^n-1) -> (rsb X, (X << n))
 
+// Saturating adds/subtracts -- for disassembly only
+
+// GPR:$dst = GPR:$a op GPR:$b
+class AQI<bits<8> op27_20, bits<4> op7_4, string opc>
+  : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
+       opc, "\t$dst, $a, $b",
+       [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = op27_20;
+  let Inst{7-4} = op7_4;
+}
+
+def QADD    : AQI<0b00010000, 0b0101, "qadd">;
+def QADD16  : AQI<0b01100010, 0b0001, "qadd16">;
+def QADD8   : AQI<0b01100010, 0b1001, "qadd8">;
+def QASX    : AQI<0b01100010, 0b0011, "qasx">;
+def QDADD   : AQI<0b00010100, 0b0101, "qdadd">;
+def QDSUB   : AQI<0b00010110, 0b0101, "qdsub">;
+def QSAX    : AQI<0b01100010, 0b0101, "qsax">;
+def QSUB    : AQI<0b00010010, 0b0101, "qsub">;
+def QSUB16  : AQI<0b01100010, 0b0111, "qsub16">;
+def QSUB8   : AQI<0b01100010, 0b1111, "qsub8">;
+def UQADD16 : AQI<0b01100110, 0b0001, "uqadd16">;
+def UQADD8  : AQI<0b01100110, 0b1001, "uqadd8">;
+def UQASX   : AQI<0b01100110, 0b0011, "uqasx">;
+def UQSAX   : AQI<0b01100110, 0b0101, "uqsax">;
+def UQSUB16 : AQI<0b01100110, 0b0111, "uqsub16">;
+def UQSUB8  : AQI<0b01100110, 0b1111, "uqsub8">;
 
 //===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
@@ -1241,11 +1411,14 @@ def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                   "mvn", "\t$dst, $src",
                   [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+  let Inst{25} = 0;
   let Inst{11-4} = 0b00000000;
 }
 def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
                   IIC_iMOVsr, "mvn", "\t$dst, $src",
-                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+  let Inst{25} = 0;
+}
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
                   IIC_iMOVi, "mvn", "\t$dst, $imm",
@@ -1442,7 +1615,39 @@ multiclass AI_smla<string opc, PatFrag opnode> {
 defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 0;
+  let Inst{6} = 0;
+}
+
+def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 0;
+  let Inst{6} = 1;
+}
+
+def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 1;
+  let Inst{6} = 0;
+}
+
+def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 1;
+  let Inst{6} = 1;
+}
+
 // TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 
 //===----------------------------------------------------------------------===//
@@ -1773,6 +1978,27 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
                     []>;
 }
 
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+             "swp", "\t$dst, $src, [$ptr]",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = 0; // B = 0
+  let Inst{21-20} = 0b00;
+  let Inst{7-4} = 0b1001;
+}
+
+def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+             "swpb", "\t$dst, $src, [$ptr]",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = 1; // B = 1
+  let Inst{21-20} = 0b00;
+  let Inst{7-4} = 0b1001;
+}
+}
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
@@ -1797,21 +2023,22 @@ let isCall = 1,
 //   except for our own input by listing the relevant registers in Defs. By
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
-let Defs = 
+//   A constant value is passed in $val, and we use the location as a scratch.
+let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
     D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
     D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
     D31 ] in {
-  def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
+  def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
                                AddrModeNone, SizeSpecial, IndexModeNone,
                                Pseudo, NoItinerary,
                                "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
-                               "add\tr12, pc, #8\n\t"
-                               "str\tr12, [$src, #+4]\n\t"
+                               "add\t$val, pc, #8\n\t"
+                               "str\t$val, [$src, #+4]\n\t"
                                "mov\tr0, #0\n\t"
                                "add\tpc, pc, #0\n\t"
                                "mov\tr0, #1 @ eh_setjmp end", "",
-                               [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1954,3 +2181,116 @@ include "ARMInstrVFP.td"
 //
 
 include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.  For disassembly only.
+//
+
+def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+            nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+            NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{4} = 0;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+               nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+               NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{4} = 0;
+}
+
+def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+              NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = 0;
+  let Inst{4} = 1;
+}
+
+def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+                NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = 0;
+  let Inst{4} = 1;
+}
+
+def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+              NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = 1;
+  let Inst{4} = 1;
+}
+
+def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+                NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = 1;
+  let Inst{4} = 1;
+}
+
+def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+               NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0100;
+}
+
+def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+                 NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-20} = 0b0100;
+}
+
+def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+               NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0101;
+}
+
+def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+                 NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-20} = 0b0101;
+}
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0000;
+  let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0100;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "mrs", "\tcpsr, $src",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"mrs","\tspsr, $src",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0110;
+  let Inst{7-4} = 0b0000;
+}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index cd063bf..e2be7ba 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2192,9 +2192,27 @@ def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+                     IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                     IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+                     [/* For disassembly only; pattern left blank */]>;
+
 //   VBIT     : Vector Bitwise Insert if True
 //              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
-// These are not yet implemented.  The TwoAddress pass will not go looking
+def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+                     IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                     IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+                     [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
 // for equivalent operations with different register constraints; it just
 // inserts copies.
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 746caff..64142ad 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -132,6 +132,14 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
            [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
 }
 
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+                [/* For disassembly only; pattern left blank */]>,
+            T1Encoding<0b101111> {
+  let Inst{9-8} = 0b10;
+}
+
 // For both thumb1 and thumb2.
 let isNotDuplicable = 1 in
 def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
@@ -775,7 +783,7 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
                     "mov", "\t$dst, $rhs", []>,
               T1Special<{1,0,?,?}>;
 
-def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
+def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
                     "mov", "\t$dst, $rhs", []>,
               T1General<{1,0,0,?,?}>;
 
@@ -813,23 +821,20 @@ let isCall = 1,
 //   except for our own input by listing the relevant registers in Defs. By
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
+//   The current SP is passed in $val, and we reuse the reg as a scratch.
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ] in {
-  def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src),
+  def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
                               AddrModeNone, SizeSpecial, NoItinerary,
-                              "mov\tr12, r1\t@ begin eh.setjmp\n"
-                              "\tmov\tr1, sp\n"
-                              "\tstr\tr1, [$src, #8]\n"
-                              "\tadr\tr1, 0f\n"
-                              "\tadds\tr1, #1\n"
-                              "\tstr\tr1, [$src, #4]\n"
-                              "\tmov\tr1, r12\n"
+                              "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                              "\tmov\t$val, pc\n"
+                              "\tadds\t$val, #9\n"
+                              "\tstr\t$val, [$src, #4]\n"
                               "\tmovs\tr0, #0\n"
                               "\tb\t1f\n"
-                              ".align 2\n"
-                              "0:\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                              "\tmovs\tr0, #1\t@ end eh.setjmp\n"
                               "1:", "",
-                              [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+                   [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 }
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c7591d2..55c7aa2 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1232,7 +1232,16 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
   let Inst{15} = 0;
 }
 
-// FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
+// A8.6.18  BFI - Bitfield insert (Encoding T1)
+// Added for disassembler with the pattern field purposely left blank.
+// FIXME: Utilize this instruction in codgen.
+def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+                IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b10110;
+  let Inst{15} = 0;
+}
 
 defm t2ORN  : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or  node:$LHS,
                           (not node:$RHS))>>;
@@ -1808,22 +1817,23 @@ let isCall = 1,
 //   except for our own input by listing the relevant registers in Defs. By
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
-let Defs = 
+//   The current SP is passed in $val, and we reuse the reg as a scratch.
+let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
     D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
     D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
     D31 ] in {
-  def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src),
+  def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
                                AddrModeNone, SizeSpecial, NoItinerary,
-                               "str.w\tsp, [$src, #+8] @ eh_setjmp begin\n"
-                               "\tadr\tr12, 0f\n"
-                               "\torr.w\tr12, r12, #1\n"
-                               "\tstr.w\tr12, [$src, #+4]\n"
+                               "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+                               "\tmov\t$val, pc\n"
+                               "\tadds\t$val, #9\n"
+                               "\tstr\t$val, [$src, #4]\n"
                                "\tmovs\tr0, #0\n"
                                "\tb\t1f\n"
-                               "0:\tmovs\tr0, #1 @ eh_setjmp end\n"
+                               "\tmovs\tr0, #1\t@ end eh.setjmp\n"
                                "1:", "",
-                               [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>;
 }
 
 
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 5bfe89d..e516593 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -114,52 +114,56 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
 // FP Binary Operations.
 //
 
-def VADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+def VADDD  : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                  IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
 
-def VADDS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+def VADDS  : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
                   IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
 
 // These are encoded as unary instructions.
 let Defs = [FPSCR] in {
-def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b),
                  IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
                  [(arm_cmpfp DPR:$a, DPR:$b)]>;
 
-def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+def VCMPD  : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b),
+                 IIC_fpCMP64, "vcmp", ".f64\t$a, $b",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b),
                  IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
                  [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b),
+                 IIC_fpCMP32, "vcmp", ".f32\t$a, $b",
+                 [/* For disassembly only; pattern left blank */]>;
 }
 
-def VDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+def VDIVD  : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                  IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
 
-def VDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+def VDIVS  : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
                  IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
                  [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
 
-def VMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+def VMULD  : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                  IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
                  [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
 
-def VMULS  : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+def VMULS  : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
                   IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
                   [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
 
-def VNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+def VNMULD  : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                   IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
-                  [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
-  let Inst{6} = 1;
-}
+                  [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
 
-def VNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+def VNMULS  : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
                   IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
-  let Inst{6} = 1;
-}
+                  [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), DPR:$b),
@@ -168,41 +172,45 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
           (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 
 
-def VSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+def VSUBD  : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
                  IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
-                 [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
-  let Inst{6} = 1;
-}
+                 [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
 
-def VSUBS  : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+def VSUBS  : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
                   IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
-  let Inst{6} = 1;
-}
+                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
 
 //===----------------------------------------------------------------------===//
 // FP Unary Operations.
 //
 
-def VABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+def VABSD  : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
                  IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fabs DPR:$a))]>;
 
-def VABSS  : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a),
                   IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
                   [(set SPR:$dst, (fabs SPR:$a))]>;
 
 let Defs = [FPSCR] in {
-def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a),
                   IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
                   [(arm_cmpfp0 DPR:$a)]>;
 
-def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+def VCMPZD  : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a),
+                  IIC_fpCMP64, "vcmp", ".f64\t$a, #0",
+                  [/* For disassembly only; pattern left blank */]>;
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a),
                   IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
                   [(arm_cmpfp0 SPR:$a)]>;
+
+def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a),
+                  IIC_fpCMP32, "vcmp", ".f32\t$a, #0",
+                  [/* For disassembly only; pattern left blank */]>;
 }
 
-def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a),
                  IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
                  [(set DPR:$dst, (fextend SPR:$a))]>;
 
@@ -213,30 +221,49 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
   let Inst{27-23} = 0b11101;
   let Inst{21-16} = 0b110111;
   let Inst{11-8}  = 0b1011;
-  let Inst{7-4}   = 0b1100;
+  let Inst{7-6}   = 0b11;
+  let Inst{4}     = 0;
 }
 
+// Between half-precision and single-precision.  For disassembly only.
+
+def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
 let neverHasSideEffects = 1 in {
-def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
                  IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
 
-def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
 } // neverHasSideEffects
 
-def VNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
                  IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fneg DPR:$a))]>;
 
-def VNEGS  : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a),
                   IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
                   [(set SPR:$dst, (fneg SPR:$a))]>;
 
-def VSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
                  IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
                  [(set DPR:$dst, (fsqrt DPR:$a))]>;
 
-def VSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
                  [(set SPR:$dst, (fsqrt SPR:$a))]>;
 
@@ -255,7 +282,16 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                       (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
                  IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
-                 [/* FIXME: Can't write pattern for multiple result instr*/]>;
+                 [/* FIXME: Can't write pattern for multiple result instr*/]> {
+  let Inst{7-6} = 0b00;
+}
+
+def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
+                      (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
+                 IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+}
 
 // FMDHR: GPR -> SPR
 // FMDLR: GPR -> SPR
@@ -263,7 +299,16 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                      (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
                 IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
-                [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+                [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
+  let Inst{7-6} = 0b00;
+}
+
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+                     (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+                IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+}
 
 // FMRDH: SPR -> GPR
 // FMRDL: SPR -> GPR
@@ -277,137 +322,271 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
 
 // Int to FP:
 
-def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
+                 (outs DPR:$dst), (ins SPR:$a),
                  IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
                  [(set DPR:$dst, (arm_sitof SPR:$a))]> {
-  let Inst{7} = 1;
+  let Inst{7} = 1; // s32
 }
 
-def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
+                 (outs SPR:$dst),(ins SPR:$a),
                  IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
                  [(set SPR:$dst, (arm_sitof SPR:$a))]> {
-  let Inst{7} = 1;
+  let Inst{7} = 1; // s32
 }
 
-def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
+                 (outs DPR:$dst), (ins SPR:$a),
                  IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
-                 [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+                 [(set DPR:$dst, (arm_uitof SPR:$a))]> {
+  let Inst{7} = 0; // u32
+}
 
-def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
+                 (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
-                 [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+                 [(set SPR:$dst, (arm_uitof SPR:$a))]> {
+  let Inst{7} = 0; // u32
+}
 
 // FP to Int:
 // Always set Z bit in the instruction, i.e. "round towards zero" variants.
 
-def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
                  IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
+def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
                  [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
                  IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
+def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
                  [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
   let Inst{7} = 1; // Z bit
 }
 
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+// For disassembly only.
+
+def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
+                       (outs SPR:$dst), (ins DPR:$a),
+                 IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
+                        (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
+                       (outs SPR:$dst), (ins DPR:$a),
+                 IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
+                        (outs SPR:$dst), (ins SPR:$a),
+                 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7} = 0; // Z bit
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+//   S16 (U=0, sx=0) -> SH
+//   U16 (U=1, sx=0) -> UH
+//   S32 (U=0, sx=1) -> SL
+//   U32 (U=1, sx=1) -> UL
+
+let Constraints = "$a = $dst" in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$src = $dst" in'
+
 //===----------------------------------------------------------------------===//
 // FP FMA Operations.
 //
 
-def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                 IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+                 (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
                  IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
                  [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                  RegConstraint<"$dstin = $dst">;
 
-def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                 IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
                 [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
                 IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
                 [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
                 RegConstraint<"$dstin = $dst">;
 
-def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst"> {
-  let Inst{6} = 1;
-}
+                RegConstraint<"$dstin = $dst">;
 
-def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+                  (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
                   IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
              [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst"> {
-  let Inst{6} = 1;
-}
+                RegConstraint<"$dstin = $dst">;
 
 def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)),
           (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 
-def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
                  IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
              [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst"> {
-  let Inst{6} = 1;
-}
+                RegConstraint<"$dstin = $dst">;
 
-def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
                 IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
              [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst"> {
-  let Inst{6} = 1;
-}
+                RegConstraint<"$dstin = $dst">;
 
 //===----------------------------------------------------------------------===//
 // FP Conditional moves.
 //
 
-def VMOVDcc  : ADuI<0b11101011, 0b0000, 0b0100,
+def VMOVDcc  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
                     IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def VMOVScc  : ASuI<0b11101011, 0b0000, 0b0100,
+def VMOVScc  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
                     IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def VNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
+def VNEGDcc  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
                     (outs DPR:$dst), (ins DPR:$false, DPR:$true),
                     IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
                 [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
                     RegConstraint<"$false = $dst">;
 
-def VNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
+def VNEGScc  : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
                     (outs SPR:$dst), (ins SPR:$false, SPR:$true),
                     IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
                 [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
@@ -432,6 +611,31 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
   let Inst{4}     = 1;
 }
 
+// FPSCR <-> GPR (for disassembly only)
+
+let Uses = [FPSCR] in {
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
+                 "\t$dst, fpscr",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = 0b11101111;
+  let Inst{19-16} = 0b0001;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{4}     = 1;
+}
+}
+
+let Defs = [FPSCR] in {
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
+                 "\tfpscr, $src",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = 0b11101110;
+  let Inst{19-16} = 0b0001;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{4}     = 1;
+}
+}
 
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b78b95b..4e2d181 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
       : ARMRegisterInfo::getRegisterNumbering(Reg);
     // AM4 - register numbers in ascending order.
     // AM5 - consecutive register numbers in ascending order.
-    if (NewOffset == Offset + (int)Size &&
+    if (Reg != ARM::SP &&
+        NewOffset == Offset + (int)Size &&
         ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
       Offset += Size;
       PRegNum = RegNum;
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp
index 3dd87c0..ccd6add 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -44,7 +44,6 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
   AsmTransCBE = arm_asm_table;
   Data64bitsDirective = 0;
   CommentString = "@";
-  COMMDirectiveTakesAlignment = false;
   SupportsDebugInformation = true;
 
   // Exceptions handling
@@ -53,17 +52,16 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
 }
 
 ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+  // ".comm align is in bytes but .align is pow-2."
   AlignmentIsInBytes = false;
+
   Data64bitsDirective = 0;
   CommentString = "@";
-  COMMDirectiveTakesAlignment = false;
-  
-  NeedsSet = false;
+
   HasLEB128 = true;
   AbsoluteDebugSectionOffsets = true;
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
-  SetDirective = "\t.set\t";
   HasLCOMMDirective = true;
 
   DwarfRequiresFrameSection = false;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 2176b27..c998ede 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -35,11 +35,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// 'isThumb'.
   bool hasThumb2;
 
-  /// Align - required alignment.  ARM functions and Thumb functions with
-  /// constant pools require 4-byte alignment; other Thumb functions
-  /// require only 2-byte alignment.
-  unsigned Align;
-
   /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
   ///
   unsigned VarArgsRegSaveSize;
@@ -94,7 +89,6 @@ public:
   ARMFunctionInfo() :
     isThumb(false),
     hasThumb2(false),
-    Align(2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
@@ -105,7 +99,6 @@ public:
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
     hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
@@ -118,9 +111,6 @@ public:
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
   bool isThumb2Function() const { return isThumb && hasThumb2; }
 
-  unsigned getAlign() const { return Align; }
-  void setAlign(unsigned a) { Align = a; }
-
   unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
   void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
 
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index d393e8d..0d4200c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -123,8 +123,8 @@ def FPSCR : ARMReg<1, "fpscr">;
 // r10 == Stack Limit
 //
 def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-                                           R7, R8, R9, R10, R12, R11,
-                                           LR, SP, PC]> {
+                                           R7, R8, R9, R10, R11, R12,
+                                           SP, LR, PC]> {
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 71f3883..426862c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -122,9 +122,9 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
   if (RelocM == Reloc::Static)
     return false;
 
-  // GV with ghost linkage (in JIT lazy compilation mode) do not require an
-  // extra load from stub.
-  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
 
   if (!isTargetDarwin()) {
     // Extra load is needed for all externally visible.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 4d20a5c..7233f5c 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -133,18 +133,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
 
 bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
-                                          MachineCodeEmitter &MCE) {
-  // FIXME: Move this to TargetJITInfo!
-  if (DefRelocModel == Reloc::Default)
-    setRelocationModel(Reloc::Static);
-
-  // Machine code emitter pass for ARM.
-  PM.add(createARMCodeEmitterPass(*this, MCE));
-  return false;
-}
-
-bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
                                           JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
@@ -154,40 +142,3 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
   return false;
 }
-
-bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          ObjectCodeEmitter &OCE) {
-  // FIXME: Move this to TargetJITInfo!
-  if (DefRelocModel == Reloc::Default)
-    setRelocationModel(Reloc::Static);
-
-  // Machine code emitter pass for ARM.
-  PM.add(createARMObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-
-bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                                CodeGenOpt::Level OptLevel,
-                                                MachineCodeEmitter &MCE) {
-  // Machine code emitter pass for ARM.
-  PM.add(createARMCodeEmitterPass(*this, MCE));
-  return false;
-}
-
-bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                                CodeGenOpt::Level OptLevel,
-                                                JITCodeEmitter &JCE) {
-  // Machine code emitter pass for ARM.
-  PM.add(createARMJITCodeEmitterPass(*this, JCE));
-  return false;
-}
-
-bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            ObjectCodeEmitter &OCE) {
-  // Machine code emitter pass for ARM.
-  PM.add(createARMObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index dd9542e..88e67e3 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -53,20 +53,7 @@ public:
   virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              MachineCodeEmitter &MCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &MCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              ObjectCodeEmitter &OCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    MachineCodeEmitter &MCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    JITCodeEmitter &MCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    ObjectCodeEmitter &OCE);
 };
 
 /// ARMTargetMachine - ARM target machine.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 9703403..a488c0a 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
 #define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCSectionELF.h"
 
 namespace llvm {
@@ -24,7 +24,7 @@ namespace llvm {
 
       if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
         StaticCtorSection =
-          getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, 
+          getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
                         MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
                         SectionKind::getDataRel());
         StaticDtorSection =
diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile
index 4fb8564..97e5612 100644
--- a/lib/Target/ARM/AsmParser/Makefile
+++ b/lib/Target/ARM/AsmParser/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMARMAsmParser
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' ARM target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index e1f386e..f60cc33 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
@@ -37,13 +38,11 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CommandLine.h"
@@ -53,8 +52,6 @@
 #include <cctype>
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 static cl::opt<bool>
 EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
             cl::desc("enable experimental asmprinter gunk in the arm backend"));
@@ -76,8 +73,9 @@ namespace {
 
   public:
     explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           const MCAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL) {
+                           MCContext &Ctx, MCStreamer &Streamer,
+                           const MCAsmInfo *T)
+      : AsmPrinter(O, TM, Ctx, Streamer, T), AFI(NULL), MCP(NULL) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
 
@@ -85,10 +83,6 @@ namespace {
       return "ARM Assembly Printer";
     }
     
-    void printMCInst(const MCInst *MI) {
-      ARMInstPrinter(O, *MAI, VerboseAsm).printInstruction(MI);
-    }
-    
     void printInstructionThroughMCStreamer(const MachineInstr *MI);
     
 
@@ -162,11 +156,18 @@ namespace {
     void printInstruction(const MachineInstr *MI);  // autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    void printMachineInstruction(const MachineInstr *MI);
+    virtual void EmitInstruction(const MachineInstr *MI);
     bool runOnMachineFunction(MachineFunction &F);
+    
+    virtual void EmitConstantPool() {} // we emit constant pools customly!
+    virtual void EmitFunctionEntryLabel();
     void EmitStartOfAsmFile(Module &M);
     void EmitEndOfAsmFile(Module &M);
 
+    MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                                          const MachineBasicBlock *MBB) const;
+    MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
     /// EmitMachineConstantPoolValue - Print a machine constantpool value to
     /// the .s file.
     virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
@@ -199,7 +200,7 @@ namespace {
           
           MachineModuleInfoMachO &MMIMachO =
             MMI->getObjFileInfo<MachineModuleInfoMachO>();
-          const MCSymbol *&StubSym =
+          MCSymbol *&StubSym =
             GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) :
                                         MMIMachO.getGVStubEntry(Sym);
           if (StubSym == 0)
@@ -219,7 +220,7 @@ namespace {
            O << "-.";
          O << ')';
       }
-      O << '\n';
+      OutStreamer.AddBlankLine();
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -233,97 +234,26 @@ namespace {
 
 #include "ARMGenAsmWriter.inc"
 
-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
-///
-bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  AFI = MF.getInfo<ARMFunctionInfo>();
-  MCP = MF.getConstantPool();
-
-  SetupMachineFunction(MF);
-  O << "\n";
-
-  // NOTE: we don't print out constant pools here, they are handled as
-  // instructions.
-
-  O << '\n';
-
-  // Print out labels for the function.
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::PrivateLinkage:
-  case Function::InternalLinkage:
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.globl\t" << *CurrentFnSym << "\n";
-    break;
-  case Function::LinkerPrivateLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-    if (Subtarget->isTargetDarwin()) {
-      O << "\t.globl\t" << *CurrentFnSym << "\n";
-      O << "\t.weak_definition\t" << *CurrentFnSym << "\n";
-    } else {
-      O << MAI->getWeakRefDirective() << *CurrentFnSym << "\n";
-    }
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  unsigned FnAlign = 1 << MF.getAlignment();  // MF alignment is log2.
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
   if (AFI->isThumbFunction()) {
-    EmitAlignment(FnAlign, F, AFI->getAlign());
     O << "\t.code\t16\n";
     O << "\t.thumb_func";
     if (Subtarget->isTargetDarwin())
-      O << "\t" << *CurrentFnSym;
-    O << "\n";
-  } else {
-    EmitAlignment(FnAlign, F);
-  }
-
-  O << *CurrentFnSym << ":\n";
-  // Emit pre-function debug information.
-  DW->BeginFunction(&MF);
-
-  if (Subtarget->isTargetDarwin()) {
-    // If the function is empty, then we need to emit *something*. Otherwise,
-    // the function's label might be associated with something that it wasn't
-    // meant to be associated with. We emit a noop in this situation.
-    MachineFunction::iterator I = MF.begin();
-
-    if (++I == MF.end() && MF.front().empty())
-      O << "\tnop\n";
-  }
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    if (I != MF.begin())
-      EmitBasicBlockStart(I);
-
-    // Print the assembly for the instruction.
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II)
-      printMachineInstruction(II);
+      O << '\t' << *CurrentFnSym;
+    O << '\n';
   }
+  
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
 
-  if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size " << *CurrentFnSym << ", .-" << *CurrentFnSym << "\n";
-
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  MCP = MF.getConstantPool();
 
-  return false;
+  return AsmPrinter::runOnMachineFunction(MF);
 }
 
 void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
@@ -367,7 +297,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
@@ -889,7 +819,7 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
   // data itself.
   if (!strcmp(Modifier, "label")) {
     unsigned ID = MI->getOperand(OpNum).getImm();
-    O << *GetCPISymbol(ID) << ":\n";
+    OutStreamer.EmitLabel(GetCPISymbol(ID));
   } else {
     assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
     unsigned CPI = MI->getOperand(OpNum).getIndex();
@@ -904,6 +834,24 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
   }
 }
 
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                            const MachineBasicBlock *MBB) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << '_' << uid2
+    << "_set_" << MBB->getNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+    << getFunctionNumber() << '_' << uid << '_' << uid2;
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
 void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
   assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!");
 
@@ -911,36 +859,34 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
   const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   
   unsigned JTI = MO1.getIndex();
-  O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-    << '_' << JTI << '_' << MO2.getImm() << ":\n";
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
 
   const char *JTEntryDirective = MAI->getData32bitsDirective();
 
-  const MachineFunction *MF = MI->getParent()->getParent();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  bool UseSet= MAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+  bool UseSet= MAI->hasSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
   SmallPtrSet<MachineBasicBlock*, 8> JTSets;
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
     MachineBasicBlock *MBB = JTBBs[i];
     bool isNew = JTSets.insert(MBB);
 
-    if (UseSet && isNew)
-      printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
+    if (UseSet && isNew) {
+      O << "\t.set\t"
+        << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ','
+        << *MBB->getSymbol(OutContext) << '-' << *JTISymbol << '\n';
+    }
 
     O << JTEntryDirective << ' ';
     if (UseSet)
-      O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
-        << '_' << JTI << '_' << MO2.getImm()
-        << "_set_" << MBB->getNumber();
-    else if (TM.getRelocationModel() == Reloc::PIC_) {
-      O << *GetMBBSymbol(MBB->getNumber())
-        << '-' << MAI->getPrivateGlobalPrefix() << "JTI"
-        << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
-    } else {
-      O << *GetMBBSymbol(MBB->getNumber());
-    }
+      O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB);
+    else if (TM.getRelocationModel() == Reloc::PIC_)
+      O << *MBB->getSymbol(OutContext) << '-' << *JTISymbol;
+    else
+      O << *MBB->getSymbol(OutContext);
+
     if (i != e-1)
       O << '\n';
   }
@@ -950,10 +896,10 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) {
   const MachineOperand &MO1 = MI->getOperand(OpNum);
   const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
-  O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-    << '_' << JTI << '_' << MO2.getImm() << ":\n";
+  
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
 
-  const MachineFunction *MF = MI->getParent()->getParent();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -969,13 +915,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) {
       O << MAI->getData8bitsDirective();
     else if (HalfWordOffset)
       O << MAI->getData16bitsDirective();
-    if (ByteOffset || HalfWordOffset) {
-      O << '(' << *GetMBBSymbol(MBB->getNumber());
-      O << "-" << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-        << '_' << JTI << '_' << MO2.getImm() << ")/2";
-    } else {
-      O << "\tb.w " << *GetMBBSymbol(MBB->getNumber());
-    }
+    
+    if (ByteOffset || HalfWordOffset)
+      O << '(' << *MBB->getSymbol(OutContext) << "-" << *JTISymbol << ")/2";
+    else
+      O << "\tb.w " << *MBB->getSymbol(OutContext);
+
     if (i != e-1)
       O << '\n';
   }
@@ -1076,12 +1021,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-
-  // Call the autogenerated instruction printer routines.
-  processDebugLoc(MI, true);
-  
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   if (EnableMCInst) {
     printInstructionThroughMCStreamer(MI);
   } else {
@@ -1090,12 +1030,8 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
       EmitAlignment(2);
     
     printInstruction(MI);
+    OutStreamer.AddBlankLine();
   }
-  
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-  processDebugLoc(MI, false);
 }
 
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
@@ -1215,20 +1151,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
   case ARM::t2MOVi32imm:
     assert(0 && "Should be lowered by thumb2it pass");
   default: break;
-  case TargetInstrInfo::DBG_LABEL:
-  case TargetInstrInfo::EH_LABEL:
-  case TargetInstrInfo::GC_LABEL:
-    printLabel(MI);
-    return;
-  case TargetInstrInfo::KILL:
-    printKill(MI);
-    return;
-  case TargetInstrInfo::INLINEASM:
-    printInlineAsm(MI);
-    return;
-  case TargetInstrInfo::IMPLICIT_DEF:
-    printImplicitDef(MI);
-    return;
   case ARM::PICADD: { // FIXME: Remove asm string from td file.
     // This is a pseudo op for a label + instruction sequence, which looks like:
     // LPC0:
@@ -1250,7 +1172,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
     AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    printMCInst(&AddInst);
+    OutStreamer.EmitInstruction(AddInst);
     return;
   }
   case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file.
@@ -1291,8 +1213,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
 
       TmpInst.addOperand(MCOperand::CreateReg(0));          // cc_out
-      printMCInst(&TmpInst);
-      O << '\n';
+      OutStreamer.EmitInstruction(TmpInst);
     }
 
     {
@@ -1306,7 +1227,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
       
       TmpInst.addOperand(MCOperand::CreateReg(0));          // cc_out
-      printMCInst(&TmpInst);
+      OutStreamer.EmitInstruction(TmpInst);
     }
     return; 
   }
@@ -1325,8 +1246,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
       TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
       
-      printMCInst(&TmpInst);
-      O << '\n';
+      OutStreamer.EmitInstruction(TmpInst);
     }
     
     {
@@ -1340,7 +1260,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
       TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
       
-      printMCInst(&TmpInst);
+      OutStreamer.EmitInstruction(TmpInst);
     }
     
     return;
@@ -1349,8 +1269,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
       
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
-  
-  printMCInst(&TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 6885ecb..d7d8e09 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -24,7 +24,6 @@ using namespace llvm;
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
 #define ARMAsmPrinter ARMInstPrinter  // FIXME: REMOVE.
-#define NO_ASM_WRITER_BOILERPLATE
 #include "ARMGenAsmWriter.inc"
 #undef MachineInstr
 #undef ARMAsmPrinter
@@ -353,6 +352,5 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
 }
 
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
-  // FIXME: remove this.
-  abort();
+  O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
 }
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index f843ee2..1b2dd48 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -135,7 +135,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx));
+                       MO.getMBB()->getSymbol(Ctx), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
       MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
index 93b8fc9..208becc 100644
--- a/lib/Target/ARM/AsmPrinter/Makefile
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMARMAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' arm target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index b766a86..a8dd38c 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMARMCodeGen
 TARGET = ARM
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index a6f26a5..9efb5a1 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -71,26 +71,6 @@ were disabled due to badness with the ARM carry flag on subtracts.
 
 //===---------------------------------------------------------------------===//
 
-We currently compile abs:
-int foo(int p) { return p < 0 ? -p : p; }
-
-into:
-
-_foo:
-        rsb r1, r0, #0
-        cmn r0, #1
-        movgt r1, r0
-        mov r0, r1
-        bx lr
-
-This is very, uh, literal.  This could be a 3 operation sequence:
-  t = (p sra 31); 
-  res = (p xor t)-t
-
-Which would be better.  This occurs in png decode.
-
-//===---------------------------------------------------------------------===//
-
 More load / store optimizations:
 1) Better representation for block transfer? This is from Olden/power:
 
diff --git a/lib/Target/ARM/TargetInfo/Makefile b/lib/Target/ARM/TargetInfo/Makefile
index 589dbe5..6292ab1 100644
--- a/lib/Target/ARM/TargetInfo/Makefile
+++ b/lib/Target/ARM/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMARMInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 387edaf..20f13f1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -382,8 +382,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
   } else {
 
-    // AddrMode4 cannot handle any offset.
-    if (AddrMode == ARMII::AddrMode4)
+    // AddrMode4 and AddrMode6 cannot handle any offset.
+    if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
       return false;
 
     // AddrModeT2_so cannot handle any offset. If there is no offset
@@ -418,15 +418,12 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
         NewOpc = positiveOffsetOpcode(Opcode);
         NumBits = 12;
       }
-    } else {
-      // VFP and NEON address modes.
-      int InstrOffs = 0;
-      if (AddrMode == ARMII::AddrMode5) {
-        const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
-        InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
-        if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
-          InstrOffs *= -1;
-      }
+    } else if (AddrMode == ARMII::AddrMode5) {
+      // VFP address mode.
+      const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+      int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+      if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
       NumBits = 8;
       Scale = 4;
       Offset += InstrOffs * 4;
@@ -435,6 +432,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
         Offset = -Offset;
         isSub = true;
       }
+    } else {
+      llvm_unreachable("Unsupported addressing mode!");
     }
 
     if (NewOpc != Opcode)
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 95288bf..5086eff 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -83,7 +83,7 @@ namespace {
     // FIXME: Do we need the 16-bit 'S' variant?
     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
     { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
-    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   0,  0,1, 0 },
+    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0 },
     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
     { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index b8a0645..5cf4866 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -21,18 +21,12 @@ namespace llvm {
 
   class AlphaTargetMachine;
   class FunctionPass;
-  class MachineCodeEmitter;
-  class ObjectCodeEmitter;
   class formatted_raw_ostream;
 
   FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
   FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
-  FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
-                                           MachineCodeEmitter &MCE);
   FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
                                               JITCodeEmitter &JCE);
-  FunctionPass *createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM,
-                                                 ObjectCodeEmitter &OCE);
   FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
   FunctionPass *createAlphaBranchSelectionPass();
 
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index b090f0d..eb5e429 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -17,9 +17,7 @@
 #include "AlphaRelocations.h"
 #include "Alpha.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/Passes.h"
@@ -30,11 +28,14 @@
 using namespace llvm;
 
 namespace {
-
-  class AlphaCodeEmitter {
-    MachineCodeEmitter &MCE;
+  class AlphaCodeEmitter : public MachineFunctionPass {
+    JITCodeEmitter &MCE;
+    const AlphaInstrInfo *II;
   public:
-    AlphaCodeEmitter(MachineCodeEmitter &mce) : MCE(mce) {}
+    static char ID;
+
+    AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID),
+    MCE(mce) {}
 
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -46,57 +47,30 @@ namespace {
 
     unsigned getMachineOpValue(const MachineInstr &MI,
                                const MachineOperand &MO);
-  };
-
-  template <class CodeEmitter>
-  class Emitter : public MachineFunctionPass, public AlphaCodeEmitter
-  {
-    const AlphaInstrInfo  *II;
-    TargetMachine         &TM;
-    CodeEmitter           &MCE;
-
-  public:
-    static char ID;
-    explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
-      : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
-        II(0), TM(tm), MCE(mce) {}
-    Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii)
-      : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
-        II(&ii), TM(tm), MCE(mce) {}
-
+    
     bool runOnMachineFunction(MachineFunction &MF);
-
+    
     virtual const char *getPassName() const {
       return "Alpha Machine Code Emitter";
     }
-
+    
   private:
     void emitBasicBlock(MachineBasicBlock &MBB);
   };
-
-  template <class CodeEmitter>
-    char Emitter<CodeEmitter>::ID = 0;
 }
 
+char AlphaCodeEmitter::ID = 0;
+
+
 /// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha
 /// code to the specified MCE object.
 
-FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
-                                               MachineCodeEmitter &MCE) {
-  return new Emitter<MachineCodeEmitter>(TM, MCE);
-}
-
 FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
                                                   JITCodeEmitter &JCE) {
-  return new Emitter<JITCodeEmitter>(TM, JCE);
-}
-FunctionPass *llvm::createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM,
-                                                     ObjectCodeEmitter &OCE) {
-  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+  return new AlphaCodeEmitter(JCE);
 }
 
-template <class CodeEmitter>
-bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
 
   do {
@@ -108,8 +82,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-template <class CodeEmitter>
-void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
   MCE.StartMachineBasicBlock(&MBB);
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
        I != E; ++I) {
@@ -122,8 +95,8 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
     case Alpha::ALTENT:
     case Alpha::PCLABEL:
     case Alpha::MEMLABEL:
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
       break; //skip these
     }
     MCE.processDebugLoc(MI.getDebugLoc(), false);
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index eaefef9..d6b17c2 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -64,7 +64,7 @@ namespace {
     /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
     /// in checking mode.  If LHS is null, we assume that the mask has already
     /// been validated before.
-    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) {
+    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
       uint64_t BitsToCheck = 0;
       unsigned Result = 0;
       for (unsigned i = 0; i != 8; ++i) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 471de7f..5d8310e 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
@@ -49,8 +49,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
   setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
 
-  setUsesGlobalOffsetTable(true);
-
   addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
   addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
   addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
@@ -223,11 +221,13 @@ static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
 SDValue
 AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool isTailCall,
+                               bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) {
+  // Alpha target does not yet support tail call optimization.
+  isTailCall = false;
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(VA.getLocMemOffset()));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), 0));
+                                         PseudoSourceValue::getStack(), 0,
+                                         false, false, 0));
     }
   }
 
@@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+                           false, false, 0);
     }
     InVals.push_back(ArgVal);
   }
@@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
       if (i == 0) VarArgsBase = FI;
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+                                false, false, 0));
 
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
         args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
       argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
       FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+                                false, false, 0));
     }
 
     //Set up a token factor with all the stack traffic
@@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0);
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+                             false, false, 0);
   SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                               DAG.getConstant(8, MVT::i64));
   SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
-                                    Tmp, NULL, 0, MVT::i32);
+                                  Tmp, NULL, 0, MVT::i32, false, false, 0);
   DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
   if (N->getValueType(0).isFloatingPoint())
   {
@@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
   SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
                                     DAG.getConstant(8, MVT::i64));
   Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
-                            MVT::i32);
+                            MVT::i32, false, false, 0);
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     SDValue Result;
     if (Op.getValueType() == MVT::i32)
       Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
-                              NULL, 0, MVT::i32);
+                              NULL, 0, MVT::i32, false, false, 0);
     else
-      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0);
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+                           false, false, 0);
     return Result;
   }
   case ISD::VACOPY: {
@@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
     const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
-    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+                              false, false, 0);
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+                                  false, false, 0);
     SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
                                DAG.getConstant(8, MVT::i64));
     Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
-                         NP, NULL,0, MVT::i32);
+                         NP, NULL,0, MVT::i32, false, false, 0);
     SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32);
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+                             false, false, 0);
   }
   case ISD::VASTART: {
     SDValue Chain = Op.getOperand(0);
@@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
     SDValue FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
-    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
+                               false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
     return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
-                             SA2, NULL, 0, MVT::i32);
+                             SA2, NULL, 0, MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
     return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
@@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
 
   SDValue Chain, DataPtr;
   LowerVAARG(N, Chain, DataPtr, DAG);
-  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0);
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+                            false, false, 0);
   Results.push_back(Res);
   Results.push_back(SDValue(Res.getNode(), 1));
 }
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index b204faf..0f17025 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -121,7 +121,7 @@ namespace llvm {
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
index b652a53..c67c6a2 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@@ -17,6 +17,7 @@ using namespace llvm;
 AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) {
   AlignmentIsInBytes = false;
   PrivateGlobalPrefix = "$";
-  PICJumpTableDirective = ".gprel32";
+  GPRel32Directive = ".gprel32";
   WeakRefDirective = "\t.weak\t";
+  HasSetDirective = false;
 }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 64bdd62..ba662fb 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
   } else {
     std::string msg;
     raw_string_ostream Msg(msg); 
-    Msg << "Too big a stack frame at " + NumBytes;
+    Msg << "Too big a stack frame at " << NumBytes;
     llvm_report_error(Msg.str());
   }
 
@@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
     } else {
       std::string msg;
       raw_string_ostream Msg(msg); 
-      Msg << "Too big a stack frame at " + NumBytes;
+      Msg << "Too big a stack frame at " << NumBytes;
       llvm_report_error(Msg.str());
     }
   }
 }
 
 unsigned AlphaRegisterInfo::getRARegister() const {
-  llvm_unreachable("What is the return address register");
-  return 0;
+  return Alpha::R26;
 }
 
 unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index d0d5a43..5169a01 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -55,35 +55,7 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
-                                        MachineCodeEmitter &MCE) {
-  PM.add(createAlphaCodeEmitterPass(*this, MCE));
-  return false;
-}
-bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel,
                                         JITCodeEmitter &JCE) {
   PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
   return false;
 }
-bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel,
-                                        ObjectCodeEmitter &OCE) {
-  PM.add(createAlphaObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                              CodeGenOpt::Level OptLevel,
-                                              MachineCodeEmitter &MCE) {
-  return addCodeEmitter(PM, OptLevel, MCE);
-}
-bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                              CodeGenOpt::Level OptLevel,
-                                              JITCodeEmitter &JCE) {
-  return addCodeEmitter(PM, OptLevel, JCE);
-}
-bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                              CodeGenOpt::Level OptLevel,
-                                              ObjectCodeEmitter &OCE) {
-  return addCodeEmitter(PM, OptLevel, OCE);
-}
-
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index f03e938..6f3a774 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -56,20 +56,7 @@ public:
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              MachineCodeEmitter &MCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &JCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              ObjectCodeEmitter &JCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    MachineCodeEmitter &MCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    JITCodeEmitter &JCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    ObjectCodeEmitter &OCE);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index b13f544..733a46c 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -29,30 +29,33 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   struct AlphaAsmPrinter : public AsmPrinter {
     /// Unique incrementer for label values for referencing Global values.
     ///
 
     explicit AlphaAsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
-                             const MCAsmInfo *T, bool V)
-      : AsmPrinter(o, tm, T, V) {}
+                             MCContext &Ctx, MCStreamer &Streamer,
+                             const MCAsmInfo *T)
+      : AsmPrinter(o, tm, Ctx, Streamer, T) {}
 
     virtual const char *getPassName() const {
       return "Alpha Assembly Printer";
     }
     void printInstruction(const MachineInstr *MI);
+    void EmitInstruction(const MachineInstr *MI) {
+      printInstruction(MI);
+      OutStreamer.AddBlankLine();
+    }
     static const char *getRegisterName(unsigned RegNo);
 
     void printOp(const MachineOperand &MO, bool IsCallOp = false);
     void printOperand(const MachineInstr *MI, int opNum);
     void printBaseOffsetPair(const MachineInstr *MI, int i, bool brackets=true);
-    bool runOnMachineFunction(MachineFunction &F);
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd(); 
     void EmitStartOfAsmFile(Module &M);
 
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -93,7 +96,7 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
 
   case MachineOperand::MO_ConstantPoolIndex:
@@ -120,73 +123,16 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
   }
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
-///
-bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out jump tables referenced by the function
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  // Print out labels for the function.
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  EmitAlignment(MF.getAlignment(), F);
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::InternalLinkage:  // Symbols default to internal.
-  case Function::PrivateLinkage:
-  case Function::LinkerPrivateLinkage:
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.globl " << *CurrentFnSym << '\n';
-    break;
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-    O << MAI->getWeakRefDirective() << *CurrentFnSym << '\n';
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
   O << "\t.ent " << *CurrentFnSym << "\n";
+}
 
-  O << *CurrentFnSym << ":\n";
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    if (I != MF.begin())
-      EmitBasicBlockStart(I);
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      ++EmittedInsts;
-      processDebugLoc(II, true);
-      printInstruction(II);
-      
-      if (VerboseAsm)
-        EmitComments(*II);
-      O << '\n';
-      processDebugLoc(II, false);
-    }
-  }
-
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
   O << "\t.end " << *CurrentFnSym << "\n";
-
-  // We didn't modify anything.
-  return false;
 }
 
 void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
index 3f64aac..3c64a3c 100644
--- a/lib/Target/Alpha/AsmPrinter/Makefile
+++ b/lib/Target/Alpha/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMAlphaAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' alpha target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
index 14cbc6c..54d53ab 100644
--- a/lib/Target/Alpha/Makefile
+++ b/lib/Target/Alpha/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMAlphaCodeGen
 TARGET = Alpha
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile
index 6f7b898..de01d7f 100644
--- a/lib/Target/Alpha/TargetInfo/Makefile
+++ b/lib/Target/Alpha/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMAlphaInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index 749f735..fe13e14 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -30,20 +30,18 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   class BlackfinAsmPrinter : public AsmPrinter {
   public:
     BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                       const MCAsmInfo *MAI, bool V)
-      : AsmPrinter(O, TM, MAI, V) {}
+                       MCContext &Ctx, MCStreamer &Streamer,
+                       const MCAsmInfo *MAI)
+      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
 
     virtual const char *getPassName() const {
       return "Blackfin Assembly Printer";
@@ -54,8 +52,10 @@ namespace {
     void printInstruction(const MachineInstr *MI);  // autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    void emitLinkage(const MCSymbol *GVSym, GlobalValue::LinkageTypes l);
-    bool runOnMachineFunction(MachineFunction &F);
+    void EmitInstruction(const MachineInstr *MI) {
+      printInstruction(MI);
+      OutStreamer.AddBlankLine();
+    }
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant, const char *ExtraCode);
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
@@ -69,76 +69,6 @@ extern "C" void LLVMInitializeBlackfinAsmPrinter() {
   RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
 }
 
-void BlackfinAsmPrinter::emitLinkage(const MCSymbol *GVSym,
-                                     GlobalValue::LinkageTypes L) {
-  switch (L) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case GlobalValue::InternalLinkage:  // Symbols default to internal.
-  case GlobalValue::PrivateLinkage:
-  case GlobalValue::LinkerPrivateLinkage:
-    break;
-  case GlobalValue::ExternalLinkage:
-    O << MAI->getGlobalDirective() << *GVSym << "\n";
-    break;
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:
-    O << MAI->getGlobalDirective() << *GVSym << "\n";
-    O << MAI->getWeakDefDirective() << *GVSym << "\n";
-    break;
-  }
-}
-
-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
-///
-bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  SetupMachineFunction(MF);
-  EmitConstantPool(MF.getConstantPool());
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-  EmitAlignment(2, F);
-  emitLinkage(CurrentFnSym, F->getLinkage());
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  O << "\t.type\t" << *CurrentFnSym << ", STT_FUNC\n";
-  O << *CurrentFnSym << ":\n";
-
-  if (DW)
-    DW->BeginFunction(&MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    EmitBasicBlockStart(I);
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      processDebugLoc(II, true);
-
-      printInstruction(II);
-      if (VerboseAsm)
-        EmitComments(*II);
-      O << '\n';
-      
-      processDebugLoc(II, false);
-      ++EmittedInsts;
-    }
-  }
-
-  O << "\t.size " << *CurrentFnSym << ", .-" << *CurrentFnSym << "\n";
-
-  if (DW)
-    DW->EndFunction(&MF);
-
-  return false;
-}
-
 void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand (opNum);
   switch (MO.getType()) {
@@ -152,7 +82,7 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress:
     O << *GetGlobalValueSymbol(MO.getGlobal());
diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile
index 30e8285..091d4df 100644
--- a/lib/Target/Blackfin/AsmPrinter/Makefile
+++ b/lib/Target/Blackfin/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMBlackfinAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' Blackfin target directory to grab private
 # headers
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index e1b6008..2c9cc60 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -175,7 +175,7 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
       // We cannot copy CC <-> !(CC/D)
       if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
         SDNode *Copy =
-          DAG.getMachineNode(TargetInstrInfo::COPY_TO_REGCLASS,
+          DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
                              NI->getDebugLoc(),
                              MVT::i32,
                              UI.getUse().get(),
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index ad2510a..5ce2013 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
                                       true, false);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -273,11 +274,13 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain,
 SDValue
 BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                   CallingConv::ID CallConv, bool isVarArg,
-                                  bool isTailCall,
+                                  bool &isTailCall,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) {
+  // Blackfin target does not yet support tail call optimization.
+  isTailCall = false;
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -327,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
                                          PseudoSourceValue::getStack(),
-                                         Offset));
+                                         Offset, false, false, 0));
     }
   }
 
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index cdbc7d2..5f39910 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -64,7 +64,7 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals);
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
index 6d0f66c..31470fb 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@@ -18,4 +18,5 @@ using namespace llvm;
 BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) {
   GlobalPrefix = "_";
   CommentString = "//";
+  HasSetDirective = false;
 }
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
index 4fdaf27..339bef9 100644
--- a/lib/Target/Blackfin/Makefile
+++ b/lib/Target/Blackfin/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMBlackfinCodeGen
 TARGET = Blackfin
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile
index 5c770cf..c49cfbe 100644
--- a/lib/Target/Blackfin/TargetInfo/Makefile
+++ b/lib/Target/Blackfin/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMBlackfinInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index e765655..c1c1d80 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -493,7 +493,7 @@ raw_ostream &
 CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
                          bool isSigned,
                          const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && 
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) && 
          "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:   return Out << "void " << NameSoFar;
@@ -540,7 +540,7 @@ CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
 std::ostream &
 CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
                          const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && 
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) && 
          "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:   return Out << "void " << NameSoFar;
@@ -591,7 +591,7 @@ raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
                                 const Type *Ty,
                                 bool isSigned, const std::string &NameSoFar,
                                 bool IgnoreName, const AttrListPtr &PAL) {
-  if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) {
     printSimpleType(Out, Ty, isSigned, NameSoFar);
     return Out;
   }
@@ -694,7 +694,7 @@ raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
 std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
                                  bool isSigned, const std::string &NameSoFar,
                                  bool IgnoreName, const AttrListPtr &PAL) {
-  if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) {
     printSimpleType(Out, Ty, isSigned, NameSoFar);
     return Out;
   }
@@ -1396,7 +1396,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
   }
   if (NeedsExplicitCast) {
     Out << "((";
-    if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext()))
+    if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
       printSimpleType(Out, Ty, TypeIsSigned);
     else
       printType(Out, Ty); // not integer, sign doesn't matter
@@ -1497,7 +1497,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   // We can't currently support integer types other than 1, 8, 16, 32, 64.
   // Validate this.
   const Type *Ty = I.getType();
-  if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+  if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
         Ty!=Type::getInt8Ty(I.getContext()) && 
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
@@ -1841,7 +1841,7 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
       return GlobalDtors;
   }
   
-  // Otherwise, it it is other metadata, don't print it.  This catches things
+  // Otherwise, if it is other metadata, don't print it.  This catches things
   // like debug information.
   if (GV->getSection() == "llvm.metadata")
     return NotPrinted;
@@ -2287,7 +2287,8 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
 void CWriter::printContainedStructs(const Type *Ty,
                                     std::set<const Type*> &StructPrinted) {
   // Don't walk through pointers.
-  if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+  if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return;
   
   // Print all contained types first.
   for (Type::subtype_iterator I = Ty->subtype_begin(),
@@ -2423,8 +2424,8 @@ static inline bool isFPIntBitCast(const Instruction &I) {
     return false;
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DstTy = I.getType();
-  return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
-         (DstTy->isFloatingPoint() && SrcTy->isInteger());
+  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
 }
 
 void CWriter::printFunction(Function &F) {
@@ -3113,7 +3114,7 @@ void CWriter::visitCallInst(CallInst &I) {
 }
 
 /// visitBuiltinCall - Handle the call to the specified builtin.  Returns true
-/// if the entire call is handled, return false it it wasn't handled, and
+/// if the entire call is handled, return false if it wasn't handled, and
 /// optionally set 'WroteCallee' if the callee has already been printed out.
 bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
                                bool &WroteCallee) {
@@ -3706,7 +3707,7 @@ bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
                                               formatted_raw_ostream &o,
                                               CodeGenFileType FileType,
                                               CodeGenOpt::Level OptLevel) {
-  if (FileType != TargetMachine::AssemblyFile) return true;
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
 
   PM.add(createGCLoweringPass());
   PM.add(createLowerInvokePass());
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
index f82d277..621948a 100644
--- a/lib/Target/CBackend/Makefile
+++ b/lib/Target/CBackend/Makefile
@@ -9,8 +9,6 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMCBackend
-CXXFLAGS = -fno-rtti
-
 DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile
index 6407904..d4d5e15 100644
--- a/lib/Target/CBackend/TargetInfo/Makefile
+++ b/lib/Target/CBackend/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMCBackendInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 10478b4..43ebdac 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -9,7 +9,6 @@ add_llvm_library(LLVMTarget
   TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
   TargetLoweringObjectFile.cpp
-  TargetMachOWriterInfo.cpp
   TargetMachine.cpp
   TargetRegisterInfo.cpp
   TargetSubtarget.cpp
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
index aa0db52..69639ef 100644
--- a/lib/Target/CellSPU/AsmPrinter/Makefile
+++ b/lib/Target/CellSPU/AsmPrinter/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMCellSPUAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' CellSPU target directory to grab
 # private headers
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 59d6ddd..2ca05c2 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -19,12 +19,8 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -33,25 +29,18 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
 namespace {
-  STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
-  const std::string bss_section(".bss");
-
   class SPUAsmPrinter : public AsmPrinter {
   public:
     explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           const MCAsmInfo *T, bool V) :
-      AsmPrinter(O, TM, T, V) {}
+                           MCContext &Ctx, MCStreamer &Streamer,
+                           const MCAsmInfo *T) :
+      AsmPrinter(O, TM, Ctx, Streamer, T) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -67,7 +56,10 @@ namespace {
     static const char *getRegisterName(unsigned RegNo);
 
 
-    void printMachineInstruction(const MachineInstr *MI);
+    void EmitInstruction(const MachineInstr *MI) {
+      printInstruction(MI);
+      OutStreamer.AddBlankLine();
+    }
     void printOp(const MachineOperand &MO);
 
     /// printRegister - Print register according to target requirements.
@@ -276,29 +268,6 @@ namespace {
         llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
       }
     }
-
-    virtual bool runOnMachineFunction(MachineFunction &F) = 0;
-  };
-
-  /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
-  class LinuxAsmPrinter : public SPUAsmPrinter {
-  public:
-    explicit LinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             const MCAsmInfo *T, bool V)
-      : SPUAsmPrinter(O, TM, T, V) {}
-
-    virtual const char *getPassName() const {
-      return "STI CBEA SPU Assembly Printer";
-    }
-
-    bool runOnMachineFunction(MachineFunction &F);
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      AU.addRequired<MachineModuleInfo>();
-      AU.addRequired<DwarfWriter>();
-      SPUAsmPrinter::getAnalysisUsage(AU);
-    }
   };
 } // end of anonymous namespace
 
@@ -312,7 +281,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) {
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
@@ -386,88 +355,7 @@ bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax
-/// to the current output stream.
-///
-void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-  processDebugLoc(MI, true);
-  printInstruction(MI);
-  if (VerboseAsm)
-    EmitComments(*MI);
-  processDebugLoc(MI, false);
-  O << '\n';
-}
-
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
-///
-bool LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out labels for the function.
-  const Function *F = MF.getFunction();
-
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-  EmitAlignment(MF.getAlignment(), F);
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::PrivateLinkage:
-  case Function::LinkerPrivateLinkage:
-  case Function::InternalLinkage:  // Symbols default to internal.
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.global\t" << *CurrentFnSym << "\n" << "\t.type\t";
-    O << *CurrentFnSym << ", @function\n";
-    break;
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-    O << "\t.global\t" << *CurrentFnSym << "\n";
-    O << "\t.weak_definition\t" << *CurrentFnSym << "\n";
-    break;
-  }
-  
-  O << *CurrentFnSym << ":\n";
-
-  // Emit pre-function debug information.
-  DW->BeginFunction(&MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-    }
-  }
-
-  O << "\t.size\t" << *CurrentFnSym << ",.-" << *CurrentFnSym << "\n";
-
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
-
-  // We didn't modify anything.
-  return false;
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
-  RegisterAsmPrinter<LinuxAsmPrinter> X(TheCellSPUTarget);
+  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
 }
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index 9f3ff74..cbdbd3c 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -10,8 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMCellSPUCodeGen
 TARGET = SPU
-CXXFLAGS = -fno-rtti
-
 BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
 		SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
 		SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 80693e1..1ed06e3 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -314,7 +314,7 @@ namespace {
       return SelectCode(CurDAG->getLoad(vecVT, dl,
                                         CurDAG->getEntryNode(), CGPoolOffset,
                                         PseudoSourceValue::getConstantPool(), 0,
-                                        false, Alignment).getNode());
+                                        false, false, Alignment).getNode());
     }
 
     /// Select - Convert the specified operand from a target-independent to a
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index aa7f910..b21eb37 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -25,7 +25,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
@@ -669,7 +669,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     // Re-emit as a v16i8 vector load
     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
                          LN->getSrcValue(), LN->getSrcValueOffset(),
-                         LN->isVolatile(), 16);
+                         LN->isVolatile(), LN->isNonTemporal(), 16);
 
     // Update the chain
     the_chain = result.getValue(1);
@@ -820,7 +820,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     // Re-emit as a v16i8 vector load
     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
                                SN->getSrcValue(), SN->getSrcValueOffset(),
-                               SN->isVolatile(), 16);
+                               SN->isVolatile(), SN->isNonTemporal(), 16);
 
     // Update the chain
     the_chain = alignLoadVec.getValue(1);
@@ -861,7 +861,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
     result = DAG.getStore(the_chain, dl, result, basePtr,
                           LN->getSrcValue(), LN->getSrcValueOffset(),
-                          LN->isVolatile(), LN->getAlignment());
+                          LN->isVolatile(), LN->isNonTemporal(),
+                          LN->getAlignment());
 
 #if 0 && !defined(NDEBUG)
     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
@@ -1086,7 +1087,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       // or we're forced to do vararg
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
       ArgOffset += StackSlotSize;
     }
 
@@ -1108,7 +1109,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
                                                  true, false);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+                                   false, false, 0);
       Chain = Store.getOperand(0);
       MemOps.push_back(Store);
 
@@ -1140,11 +1142,13 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
 SDValue
 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall,
+                             bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) {
+  // CellSPU target does not yet support tail call optimization.
+  isTailCall = false;
 
   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
   unsigned NumOps     = Outs.size();
@@ -1188,7 +1192,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
@@ -1197,7 +1202,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
@@ -1210,7 +1216,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index ab349bb..3c51177 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -158,7 +158,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool isTailCall,
+                bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 03cdb29..5ef3c6b 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -16,7 +16,6 @@ using namespace llvm;
 
 SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   ZeroDirective = "\t.space\t";
-  SetDirective = "\t.set";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
   HasLCOMMDirective = true;
@@ -31,7 +30,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   HasDotLocAndDotFile = true;
 
   SupportsDebugInformation = true;
-  NeedsSet = true;
 
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile
index 30ca5cf..9cb6827 100644
--- a/lib/Target/CellSPU/TargetInfo/Makefile
+++ b/lib/Target/CellSPU/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMCellSPUInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 73272bc..e3f2e9f 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -307,8 +307,6 @@ namespace {
       Out << "GlobalValue::DLLExportLinkage"; break;
     case GlobalValue::ExternalWeakLinkage:
       Out << "GlobalValue::ExternalWeakLinkage"; break;
-    case GlobalValue::GhostLinkage:
-      Out << "GlobalValue::GhostLinkage"; break;
     case GlobalValue::CommonLinkage:
       Out << "GlobalValue::CommonLinkage"; break;
     }
@@ -346,7 +344,7 @@ namespace {
 
   std::string CppWriter::getCppName(const Type* Ty) {
     // First, handle the primitive types .. easy
-    if (Ty->isPrimitiveType() || Ty->isInteger()) {
+    if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
       switch (Ty->getTypeID()) {
       case Type::VoidTyID:   return "Type::getVoidTy(getGlobalContext())";
       case Type::IntegerTyID: {
@@ -472,6 +470,7 @@ namespace {
         HANDLE_ATTR(Nest);
         HANDLE_ATTR(ReadNone);
         HANDLE_ATTR(ReadOnly);
+        HANDLE_ATTR(InlineHint);
         HANDLE_ATTR(NoInline);
         HANDLE_ATTR(AlwaysInline);
         HANDLE_ATTR(OptimizeForSize);
@@ -494,7 +493,7 @@ namespace {
 
   bool CppWriter::printTypeInternal(const Type* Ty) {
     // We don't print definitions for primitive types
-    if (Ty->isPrimitiveType() || Ty->isInteger())
+    if (Ty->isPrimitiveType() || Ty->isIntegerTy())
       return false;
 
     // If we already defined this type, we don't need to define it again.
@@ -687,7 +686,7 @@ namespace {
 
       // For primitive types and types already defined, just add a name
       TypeMap::const_iterator TNI = TypeNames.find(TI->second);
-      if (TI->second->isInteger() || TI->second->isPrimitiveType() ||
+      if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
           TNI != TypeNames.end()) {
         Out << "mod->addTypeName(\"";
         printEscapedString(TI->first);
@@ -2011,7 +2010,7 @@ bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
                                                 formatted_raw_ostream &o,
                                                 CodeGenFileType FileType,
                                                 CodeGenOpt::Level OptLevel) {
-  if (FileType != TargetMachine::AssemblyFile) return true;
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   PM.add(new CppWriter(o));
   return false;
 }
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
index 52f2aad..d75f4e8 100644
--- a/lib/Target/CppBackend/Makefile
+++ b/lib/Target/CppBackend/Makefile
@@ -9,8 +9,6 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMCppBackend
-CXXFLAGS = -fno-rtti
-
 DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/TargetInfo/Makefile b/lib/Target/CppBackend/TargetInfo/Makefile
index 7e44aab..6e68283 100644
--- a/lib/Target/CppBackend/TargetInfo/Makefile
+++ b/lib/Target/CppBackend/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMCppBackendInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 1bc708e..3330d09 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -187,7 +187,7 @@ void MSILWriter::printModuleStartup() {
     break;
   case 1:
     Arg1 = F->arg_begin();
-    if (Arg1->getType()->isInteger()) {
+    if (Arg1->getType()->isIntegerTy()) {
       Out << "\tldloc\targc\n";
       Args = getTypeName(Arg1->getType());
       BadSig = false;
@@ -195,7 +195,7 @@ void MSILWriter::printModuleStartup() {
     break;
   case 2:
     Arg1 = Arg2 = F->arg_begin(); ++Arg2;
-    if (Arg1->getType()->isInteger() && 
+    if (Arg1->getType()->isIntegerTy() && 
         Arg2->getType()->getTypeID() == Type::PointerTyID) {
       Out << "\tldloc\targc\n\tldloc\targv\n";
       Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
@@ -207,7 +207,7 @@ void MSILWriter::printModuleStartup() {
   }
 
   bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
-  if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) {
+  if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
     Out << "\tldc.i4.0\n";
   } else {
     Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
@@ -334,7 +334,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
 
 std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
                                     bool isNested) {
-  if (Ty->isPrimitiveType() || Ty->isInteger())
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy())
     return getPrimitiveTypeName(Ty,isSigned);
   // FIXME: "OpaqueType" support
   switch (Ty->getTypeID()) {
@@ -1690,7 +1690,7 @@ bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
                                           CodeGenFileType FileType,
                                           CodeGenOpt::Level OptLevel)
 {
-  if (FileType != TargetMachine::AssemblyFile) return true;
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   MSILWriter* Writer = new MSILWriter(o);
   PM.add(createGCLoweringPass());
   // FIXME: Handle switch through native IL instruction "switch"
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
index 9fecba5..70eadb3 100644
--- a/lib/Target/MSIL/Makefile
+++ b/lib/Target/MSIL/Makefile
@@ -9,8 +9,6 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMMSIL
-CXXFLAGS = -fno-rtti
-
 DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile
index 710f5a1..30b0950 100644
--- a/lib/Target/MSIL/TargetInfo/Makefile
+++ b/lib/Target/MSIL/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMMSILInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index 6033197..def5fc6 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -35,24 +35,16 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
-static cl::opt<bool>
-EnableMCInst("enable-msp430-mcinst-printer", cl::Hidden,
-             cl::desc("enable experimental mcinst gunk in the msp430 backend"));
-
 namespace {
   class MSP430AsmPrinter : public AsmPrinter {
   public:
     MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                     const MCAsmInfo *MAI, bool V)
-      : AsmPrinter(O, TM, MAI, V) {}
+                     MCContext &Ctx, MCStreamer &Streamer,
+                     const MCAsmInfo *MAI)
+      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
 
     virtual const char *getPassName() const {
       return "MSP430 Assembly Printer";
@@ -76,10 +68,7 @@ namespace {
     bool PrintAsmMemoryOperand(const MachineInstr *MI,
                                unsigned OpNo, unsigned AsmVariant,
                                const char *ExtraCode);
-    void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
-    void emitFunctionHeader(const MachineFunction &MF);
-    bool runOnMachineFunction(MachineFunction &F);
+    void EmitInstruction(const MachineInstr *MI);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AsmPrinter::getAnalysisUsage(AU);
@@ -89,81 +78,11 @@ namespace {
 } // end of anonymous namespace
 
 
-void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  unsigned FnAlign = MF.getAlignment();
-  EmitAlignment(FnAlign, F);
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::InternalLinkage:  // Symbols default to internal.
-  case Function::PrivateLinkage:
-  case Function::LinkerPrivateLinkage:
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.globl\t" << *CurrentFnSym << '\n';
-    break;
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-    O << "\t.weak\t" << *CurrentFnSym << '\n';
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  O << "\t.type\t" << *CurrentFnSym << ",@function\n";
-  O << *CurrentFnSym << ":\n";
-}
-
-bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print the 'header' of function
-  emitFunctionHeader(MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    EmitBasicBlockStart(I);
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II)
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-  }
-
-  if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
-
-  // We didn't modify anything
-  return false;
-}
-
-void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-
-  processDebugLoc(MI, true);
-
-  printInstructionThroughMCStreamer(MI);
-
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-
-  processDebugLoc(MI, false);
-}
-
 void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                     const char* Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
+  default: assert(0 && "Not implemented yet!");
   case MachineOperand::MO_Register:
     O << MSP430InstPrinter::getRegisterName(MO.getReg());
     return;
@@ -173,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
@@ -196,8 +115,6 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << MAI->getGlobalPrefix() << MO.getSymbolName();
     return;
   }
-  default:
-    llvm_unreachable("Not implemented yet!");
   }
 }
 
@@ -226,30 +143,14 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
 }
 
 void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
-  unsigned CC = MI->getOperand(OpNum).getImm();
-
-  switch (CC) {
-  default:
-   llvm_unreachable("Unsupported CC code");
-   break;
-  case MSP430CC::COND_E:
-   O << "eq";
-   break;
-  case MSP430CC::COND_NE:
-   O << "ne";
-   break;
-  case MSP430CC::COND_HS:
-   O << "hs";
-   break;
-  case MSP430CC::COND_LO:
-   O << "lo";
-   break;
-  case MSP430CC::COND_GE:
-   O << "ge";
-   break;
-  case MSP430CC::COND_L:
-   O << 'l';
-   break;
+  switch (MI->getOperand(OpNum).getImm()) {
+  default: assert(0 && "Unknown cond");
+  case MSP430CC::COND_E:  O << "eq"; break;
+  case MSP430CC::COND_NE: O << "ne"; break;
+  case MSP430CC::COND_HS: O << "hs"; break;
+  case MSP430CC::COND_LO: O << "lo"; break;
+  case MSP430CC::COND_GE: O << "ge"; break;
+  case MSP430CC::COND_L:  O << 'l';  break;
   }
 }
 
@@ -277,32 +178,12 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 //===----------------------------------------------------------------------===//
-void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){
-
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
 
-  switch (MI->getOpcode()) {
-  case TargetInstrInfo::DBG_LABEL:
-  case TargetInstrInfo::EH_LABEL:
-  case TargetInstrInfo::GC_LABEL:
-    printLabel(MI);
-    return;
-  case TargetInstrInfo::KILL:
-    printKill(MI);
-    return;
-  case TargetInstrInfo::INLINEASM:
-    printInlineAsm(MI);
-    return;
-  case TargetInstrInfo::IMPLICIT_DEF:
-    printImplicitDef(MI);
-    return;
-  default: break;
-  }
-
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
-
-  printMCInst(&TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
index a480307..f6565bd 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
@@ -25,7 +25,6 @@ using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
-#define NO_ASM_WRITER_BOILERPLATE
 #include "MSP430GenAsmWriter.inc"
 #undef MachineInstr
 
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index e1f80b7..4eb7f3d 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -116,7 +116,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                         Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx));
+                         MO.getMBB()->getSymbol(Printer.OutContext), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
       MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile
index c8a44a1..4f340c6 100644
--- a/lib/Target/MSP430/AsmPrinter/Makefile
+++ b/lib/Target/MSP430/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMMSP430AsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' MSP430 target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 4eec757..a8c5e0a 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -133,8 +133,7 @@ namespace {
     bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
     bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
 
-    bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                    SDNode *Root) const;
+    bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const;
 
     virtual bool
     SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
@@ -336,8 +335,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   return false;
 }
 
-bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                    SDNode *Root) const {
+bool MSP430DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U,
+                                       SDNode *Root) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
   /// RMW preprocessing creates the following code:
@@ -364,11 +363,11 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   /// during preprocessing) to determine whether it's legal to introduce such
   /// "cycle" for a moment.
   DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
-  if (I != RMWStores.end() && I->second == N)
+  if (I != RMWStores.end() && I->second == N.getNode())
     return true;
 
   // Proceed to 'generic' cycle finder code
-  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
+  return SelectionDAGISel::IsLegalToFold(N, U, Root);
 }
 
 
@@ -656,7 +655,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
                                                unsigned Opc8, unsigned Opc16) {
   if (N1.getOpcode() == ISD::LOAD &&
       N1.hasOneUse() &&
-      IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) {
+      IsLegalToFold(N1, Op, Op)) {
     LoadSDNode *LD = cast<LoadSDNode>(N1);
     if (!isValidIndexedLoad(LD))
       return NULL;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index b794911..7281b37 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -31,8 +31,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -273,11 +273,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
 SDValue
 MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                 CallingConv::ID CallConv, bool isVarArg,
-                                bool isTailCall,
+                                bool &isTailCall,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 DebugLoc dl, SelectionDAG &DAG,
                                 SmallVectorImpl<SDValue> &InVals) {
+  // MSP430 target does not yet support tail call optimization.
+  isTailCall = false;
 
   switch (CallConv) {
   default:
@@ -369,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0));
+                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   false, false, 0));
     }
   }
 
@@ -498,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                          PseudoSourceValue::getStack(),
-                                         VA.getLocMemOffset()));
+                                         VA.getLocMemOffset(), false, false, 0));
     }
   }
 
@@ -891,13 +894,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0);
+                       NULL, 0, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0);
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -909,7 +912,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                                          MSP430::FPW, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -969,7 +973,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
                                           const Type *Ty2) const {
-  if (!Ty1->isInteger() || !Ty2->isInteger())
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
 
   return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
@@ -984,7 +988,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 
 bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
-  return 0 && Ty1->isInteger(8) && Ty2->isInteger(16);
+  return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
 }
 
 bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 6152a05..87a790b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -154,7 +154,7 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals);
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 9dc69e0..6372482 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -356,12 +356,12 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     switch (Desc.getOpcode()) {
     default:
       assert(0 && "Unknown instruction size!");
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
       return 0;
-    case TargetInstrInfo::INLINEASM: {
+    case TargetOpcode::INLINEASM: {
       const MachineFunction *MF = MI->getParent()->getParent();
       const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
       return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index cd502cf..bb06f7b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -285,7 +285,7 @@ def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
 // up to 16 bits.
 def def8 : PatLeaf<(i8 GR8:$src), [{
   return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
          N->getOpcode() != ISD::CopyFromReg;
 }]>;
 
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index 516eacb..cfb499d 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -17,7 +17,6 @@ using namespace llvm;
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
   PrivateGlobalPrefix = ".L";
   WeakRefDirective ="\t.weak\t";
-  SetDirective = "\t.set\t";
   PCSymbol=".";
   CommentString = ";";
 
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
index 11195a4..b1f33d6 100644
--- a/lib/Target/MSP430/Makefile
+++ b/lib/Target/MSP430/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMMSP430CodeGen
 TARGET = MSP430
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
diff --git a/lib/Target/MSP430/TargetInfo/Makefile b/lib/Target/MSP430/TargetInfo/Makefile
index d17fa7b..abb08f2 100644
--- a/lib/Target/MSP430/TargetInfo/Makefile
+++ b/lib/Target/MSP430/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMMSP430Info
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Makefile b/lib/Target/Makefile
index 281d58b..50a360f 100644
--- a/lib/Target/Makefile
+++ b/lib/Target/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMTarget
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 # We include this early so we can access the value of TARGETS_TO_BUILD as the
 # value for PARALLEL_DIRS which must be set before Makefile.rules is included
diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile
index aed801e..a2fecf4 100644
--- a/lib/Target/Mips/AsmPrinter/Makefile
+++ b/lib/Target/Mips/AsmPrinter/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMMipsAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' Mips target directory to grab
 # private headers
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 9af9bd8..b8641c3 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
@@ -46,15 +45,14 @@
 #include <cctype>
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   class MipsAsmPrinter : public AsmPrinter {
     const MipsSubtarget *Subtarget;
   public:
     explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, 
-                            const MCAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V) {
+                            MCContext &Ctx, MCStreamer &Streamer,
+                            const MCAsmInfo *T)
+      : AsmPrinter(O, TM, Ctx, Streamer, T) {
       Subtarget = &TM.getSubtarget<MipsSubtarget>();
     }
 
@@ -70,18 +68,22 @@ namespace {
                          const char *Modifier = 0);
     void printFCCOperand(const MachineInstr *MI, int opNum, 
                          const char *Modifier = 0);
-    void printSavedRegsBitmask(MachineFunction &MF);
+    void printSavedRegsBitmask();
     void printHex32(unsigned int Value);
 
     const char *emitCurrentABIString();
-    void emitFunctionStart(MachineFunction &MF);
-    void emitFunctionEnd(MachineFunction &MF);
-    void emitFrameDirective(MachineFunction &MF);
+    void emitFrameDirective();
 
     void printInstruction(const MachineInstr *MI);  // autogenerated.
+    void EmitInstruction(const MachineInstr *MI) {
+      printInstruction(MI);
+      OutStreamer.AddBlankLine();
+    }
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
     static const char *getRegisterName(unsigned RegNo);
 
-    bool runOnMachineFunction(MachineFunction &F);
+    virtual void EmitFunctionEntryLabel();
     void EmitStartOfAsmFile(Module &M);
   };
 } // end of anonymous namespace
@@ -125,18 +127,16 @@ namespace {
 
 // Create a bitmask with all callee saved registers for CPU or Floating Point 
 // registers. For CPU registers consider RA, GP and FP for saving if necessary.
-void MipsAsmPrinter::
-printSavedRegsBitmask(MachineFunction &MF)
-{
+void MipsAsmPrinter::printSavedRegsBitmask() {
   const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
              
   // CPU and FPU Saved Registers Bitmasks
   unsigned int CPUBitmask = 0;
   unsigned int FPUBitmask = 0;
 
   // Set the CPU and FPU Bitmasks
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg());
@@ -147,11 +147,11 @@ printSavedRegsBitmask(MachineFunction &MF)
   }
 
   // Return Address and Frame registers must also be set in CPUBitmask.
-  if (RI.hasFP(MF)) 
+  if (RI.hasFP(*MF)) 
     CPUBitmask |= (1 << MipsRegisterInfo::
-                getRegisterNumbering(RI.getFrameRegister(MF)));
+                getRegisterNumbering(RI.getFrameRegister(*MF)));
   
-  if (MF.getFrameInfo()->hasCalls()) 
+  if (MFI->hasCalls()) 
     CPUBitmask |= (1 << MipsRegisterInfo::
                 getRegisterNumbering(RI.getRARegister()));
 
@@ -178,12 +178,12 @@ printHex32(unsigned int Value)
 //===----------------------------------------------------------------------===//
 
 /// Frame Directive
-void MipsAsmPrinter::emitFrameDirective(MachineFunction &MF) {
+void MipsAsmPrinter::emitFrameDirective() {
   const TargetRegisterInfo &RI = *TM.getRegisterInfo();
 
-  unsigned stackReg  = RI.getFrameRegister(MF);
+  unsigned stackReg  = RI.getFrameRegister(*MF);
   unsigned returnReg = RI.getRARegister();
-  unsigned stackSize = MF.getFrameInfo()->getStackSize();
+  unsigned stackSize = MF->getFrameInfo()->getStackSize();
 
 
   O << "\t.frame\t" << '$' << LowercaseString(getRegisterName(stackReg))
@@ -207,96 +207,30 @@ const char *MipsAsmPrinter::emitCurrentABIString() {
   return NULL;
 }  
 
-/// Emit the directives used by GAS on the start of functions
-void MipsAsmPrinter::emitFunctionStart(MachineFunction &MF) {
-  // Print out the label for the function.
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  // 2 bits aligned
-  EmitAlignment(MF.getAlignment(), F);
-
-  O << "\t.globl\t" << *CurrentFnSym << '\n';
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
   O << "\t.ent\t" << *CurrentFnSym << '\n';
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
 
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  if ((MAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux())
-    O << "\t.type\t" << *CurrentFnSym << ", @function\n";
-
-  O << *CurrentFnSym << ":\n";
-
-  emitFrameDirective(MF);
-  printSavedRegsBitmask(MF);
-
-  O << '\n';
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+  emitFrameDirective();
+  printSavedRegsBitmask();
 }
 
-/// Emit the directives used by GAS on the end of functions
-void MipsAsmPrinter::emitFunctionEnd(MachineFunction &MF) {
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
   // There are instruction for this macros, but they must
   // always be at the function end, and we can't emit and
   // break with BB logic. 
   O << "\t.set\tmacro\n"; 
   O << "\t.set\treorder\n"; 
-
+  
   O << "\t.end\t" << *CurrentFnSym << '\n';
-  if (MAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux())
-    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
-bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out jump tables referenced by the function
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  O << "\n\n";
-
-  // Emit the function start directives
-  emitFunctionStart(MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      processDebugLoc(II, true);
-
-      // Print the assembly for the instruction.
-      printInstruction(II);
-      
-      if (VerboseAsm)
-        EmitComments(*II);
-      O << '\n';
-
-      processDebugLoc(II, false);      
-      ++EmittedInsts;
-    }
-
-    // Each Basic Block is separated by a newline
-    O << '\n';
-  }
-
-  // Emit function end directives
-  emitFunctionEnd(MF);
-
-  // We didn't modify anything.
-  return false;
-}
 
 // Print out an operand for an inline asm expression.
 bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
@@ -343,7 +277,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       break;
 
     case MachineOperand::MO_MachineBasicBlock:
-      O << *GetMBBSymbol(MO.getMBB()->getNumber());
+      O << *MO.getMBB()->getSymbol(OutContext);
       return;
 
     case MachineOperand::MO_GlobalAddress:
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 4e4d874..2ed8d77 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMMipsCodeGen
 TARGET = Mips
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index e3a45d2..f1d4a67 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -245,7 +245,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   //    lwc $f1, X+4($3)
   SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, 
                                     MVT::Other, Offset0, Base, Chain);
-  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, 
                             MVT::f64, Undef, SDValue(LD0, 0));
@@ -426,7 +426,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 
       SDValue InFlag = SDValue(MulNode, 0);
 
-      if (MulOp == ISD::MUL)
+      if (Opcode == ISD::MUL)
         return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
       else
         return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
@@ -464,8 +464,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
         SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 
                                         Mips::ZERO, MVT::i32);
         SDValue Undef = SDValue(
-          CurDAG->getMachineNode(
-            TargetInstrInfo::IMPLICIT_DEF, dl, MVT::f64), 0);
+          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
         SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
         SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, 
                             MVT::f64, Undef, SDValue(MTC, 0));
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index ced8b93..584b887 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -60,9 +60,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   // setcc operations results (slt, sgt, ...). 
   setBooleanContents(ZeroOrOneBooleanContent);
 
-  // JumpTable targets must use GOT when using PIC_
-  setUsesGlobalOffsetTable(true);
-
   // Set up the register classes
   addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
   addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
@@ -100,6 +97,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
   setOperationAction(ISD::FP_TO_SINT,         MVT::i32,   Custom);
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+
 
   // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' 
   // with operands comming from setcc fp comparions. This is necessary since 
@@ -182,6 +181,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG)
     case ISD::OR:                 return LowerANDOR(Op, DAG);
     case ISD::SELECT:             return LowerSELECT(Op, DAG);
     case ISD::SETCC:              return LowerSETCC(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
   }
   return SDValue();
 }
@@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
     SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
                                             MipsII::MO_GOT);
     SDValue ResNode = DAG.getLoad(MVT::i32, dl, 
-                                  DAG.getEntryNode(), GA, NULL, 0);
+                                  DAG.getEntryNode(), GA, NULL, 0,
+                                  false, false, 0);
     // On functions and global targets not internal linked only
     // a load from got/GP is necessary for PIC to work.
     if (!GV->hasLocalLinkage() || isa<Function>(GV))
@@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
     SDValue Ops[] = { JTI };
     HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
   } else // Emit Load from Global Pointer
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+                         false, false, 0);
 
   SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
   ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
@@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
     SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
                                       N->getOffset(), MipsII::MO_GOT);
     SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), 
-                                 CP, NULL, 0);
+                               CP, NULL, 0, false, false, 0);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
   }
@@ -594,6 +596,17 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   return ResNode;
 }
 
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+                      false, false, 0);
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -679,21 +692,86 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
   return false; // CC must always match
 }
 
+static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT,
+                       EVT LocVT, CCValAssign::LocInfo LocInfo,
+                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const unsigned IntRegsSize=4;
+
+  static const unsigned IntRegs[] = {
+      Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (ValVT == MVT::i32 || ValVT == MVT::f32) {
+    if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+      return false;
+    }
+    unsigned Off = State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
+  if (ValVT == MVT::f64) {
+    if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) {
+      // A1 can't be used anymore, because 64 bit arguments
+      // must be aligned when copied back to the caller stack
+      State.AllocateReg(IntRegs, IntRegsSize);
+      UnallocIntReg++;
+    }
+
+    if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) ||
+        IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) {
+      unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize);
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+      // Shadow the next register so it can be used 
+      // later to get the other 32bit part.
+      State.AllocateReg(IntRegs, IntRegsSize);
+      return false;
+    }
+
+    // Register is shadowed to preserve alignment, and the
+    // argument goes to a stack location.
+    if (UnallocIntReg != IntRegsSize)
+      State.AllocateReg(IntRegs, IntRegsSize);
+
+    unsigned Off = State.AllocateStack(8, 8);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
+    return false;
+  }
+
+  return true; // CC didn't match
+}
+
 //===----------------------------------------------------------------------===//
 //                  Call Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
-/// TODO: isVarArg, isTailCall.
+/// TODO: isTailCall.
 SDValue
 MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
-                              bool isTailCall,
+                              bool &isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals) {
+  // MIPs target does not yet support tail call optimization.
+  isTailCall = false;
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -709,7 +787,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (Subtarget->isABI_O32()) {
     int VTsize = EVT(MVT::i32).getSizeInBits()/8;
     MFI->CreateFixedObject(VTsize, (VTsize*3), true, false);
-    CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+    CCInfo.AnalyzeCallOperands(Outs, 
+                     isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
   } else
     CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
   
@@ -783,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // emit ISD::STORE whichs stores the 
     // parameter value to a stack Location
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   }
 
   // Transform all store nodes into one single node because all store
@@ -835,11 +915,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   Chain  = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
-  // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
-  InFlag = Chain.getValue(1);
-
   // Create a stack location to hold GP when PIC is used. This stack 
   // location is used on function prologue to save GP and also after all 
   // emited CALL's to restore GP. 
@@ -862,13 +937,19 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       // Reload GP value.
       FI = MipsFI->getGPFI();
       SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
-      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0);
+      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+                                   false, false, 0);
       Chain = GPLoad.getValue(1);
       Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), 
                                GPLoad, SDValue(0,0));
       InFlag = Chain.getValue(1);
   }      
 
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
   // Handle result values, copying them out of physregs into vregs that we
   // return.
   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
@@ -906,23 +987,28 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 //             Formal Arguments Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// LowerFormalArguments - transform physical registers into
-/// virtual registers and generate load operations for
-/// arguments places on the stack.
-/// TODO: isVarArg
+/// LowerFormalArguments - transform physical registers into virtual registers 
+/// and generate load operations for arguments places on the stack.
 SDValue
 MipsTargetLowering::LowerFormalArguments(SDValue Chain,
-                                         CallingConv::ID CallConv, bool isVarArg,
-                                         const SmallVectorImpl<ISD::InputArg>
-                                           &Ins,
-                                         DebugLoc dl, SelectionDAG &DAG,
-                                         SmallVectorImpl<SDValue> &InVals) {
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+  VarArgsFrameIndex = 0;
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Keep track of the last register used for arguments
+  unsigned ArgRegEnd = 0;
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -930,7 +1016,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                  ArgLocs, *DAG.getContext());
 
   if (Subtarget->isABI_O32())
-    CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
+    CCInfo.AnalyzeFormalArguments(Ins, 
+                        isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
   else
     CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
 
@@ -944,6 +1031,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     // Arguments stored on registers
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
+      ArgRegEnd = VA.getLocReg();
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
@@ -954,11 +1042,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         if (!Subtarget->isSingleFloat()) 
           RC = Mips::AFGR64RegisterClass;
       } else  
-        llvm_unreachable("RegVT not supported by LowerFormalArguments Lowering");
+        llvm_unreachable("RegVT not supported by FormalArguments Lowering");
 
       // Transform the arguments stored on 
       // physical registers into virtual ones
-      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
       
       // If this is an 8 or 16-bit value, it has been passed promoted 
@@ -991,34 +1079,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       }
 
       InVals.push_back(ArgValue);
-
-      // To meet ABI, when VARARGS are passed on registers, the registers
-      // must have their values written to the caller stack frame. 
-      if ((isVarArg) && (Subtarget->isABI_O32())) {
-        if (StackPtr.getNode() == 0)
-          StackPtr = DAG.getRegister(StackReg, getPointerTy());
-     
-        // The stack pointer offset is relative to the caller stack frame. 
-        // Since the real stack size is unknown here, a negative SPOffset 
-        // is used so there's a way to adjust these offsets when the stack
-        // size get known (on EliminateFrameIndex). A dummy SPOffset is 
-        // used instead of a direct negative address (which is recorded to
-        // be used on emitPrologue) to avoid mis-calc of the first stack 
-        // offset on PEI::calculateFrameObjectOffsets.
-        // Arguments are always 32-bit.
-        int FI = MFI->CreateFixedObject(4, 0, true, false);
-        MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4)));
-        SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      
-        // emit ISD::STORE whichs stores the 
-        // parameter value to a stack Location
-        InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0));
-      }
-
     } else { // VA.isRegLoc()
 
       // sanity check
       assert(VA.isMemLoc());
+
+      // The last argument is not a register anymore
+      ArgRegEnd = 0;
       
       // The stack pointer offset is relative to the caller stack frame. 
       // Since the real stack size is unknown here, a negative SPOffset 
@@ -1035,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -1052,6 +1120,42 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
+  // To meet ABI, when VARARGS are passed on registers, the registers
+  // must have their values written to the caller stack frame. If the last
+  // argument was placed in the stack, there's no need to save any register. 
+  if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getRegister(StackReg, getPointerTy());
+  
+    // The last register argument that must be saved is Mips::A3
+    TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+    unsigned StackLoc = ArgLocs.size()-1;
+
+    for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) {
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+
+      int FI = MFI->CreateFixedObject(4, 0, true, false);
+      MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+                                       false, false, 0));
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      if (!VarArgsFrameIndex)
+        VarArgsFrameIndex = FI;
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between 
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
   return Chain;
 }
 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index cacf4b5..7256617 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,6 +68,8 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   
   class MipsTargetLowering : public TargetLowering  {
+    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+
   public:
 
     explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -107,6 +109,7 @@ namespace llvm {
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -118,7 +121,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool isTailCall,
+                bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index ce89cfd..fa4518d 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -113,7 +113,6 @@ let ft = 0 in {
   defm ROUND_W : FFR1_1<0b001100, "round.w">;
   defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
   defm CVTW    : FFR1_1<0b100100, "cvt.w">;
-  defm FMOV    : FFR1_1<0b000110, "mov">;
 
   defm FABS    : FFR1_2<0b000101, "abs",  fabs>; 
   defm FNEG    : FFR1_2<0b000111, "neg",  fneg>; 
@@ -173,6 +172,11 @@ let fd = 0 in {
                   "mtc1 $rt, $fs", []>;
 }
 
+def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+                   "mov.s $fd, $fs", []>;
+def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+                   "mov.d $fd, $fs", []>;
+
 /// Floating Point Memory Instructions
 let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
   def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index e67bcbf..f16a805 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{
 
 // Mips Address Mode! SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
-def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
 
 //===----------------------------------------------------------------------===//
 // Instructions specific format
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp
index 60ef1c9..89e3e11 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MipsMCAsmInfo.cpp
@@ -16,12 +16,12 @@ using namespace llvm;
 
 MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) {
   AlignmentIsInBytes          = false;
-  COMMDirectiveTakesAlignment = true;
   Data16bitsDirective         = "\t.half\t";
   Data32bitsDirective         = "\t.word\t";
   Data64bitsDirective         = 0;
   PrivateGlobalPrefix         = "$";
   CommentString               = "#";
   ZeroDirective               = "\t.space\t";
-  PICJumpTableDirective       = "\t.gpword\t";
+  GPRel32Directive            = "\t.gpword\t";
+  HasSetDirective             = false;
 }
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 80fd917..f923bed 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -223,6 +223,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+  unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
+  bool HasGP = MipsFI->needGPSaveRestore();
 
   // Min and Max CSI FrameIndex.
   int MinCSFI = -1, MaxCSFI = -1; 
@@ -248,6 +250,9 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   for (unsigned i = 0, e = CSI.size(); i != e; ++i)
     CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
 
+  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
+                : (Subtarget.isABI_O32() ? 16 : 0);
+
   // Adjust local variables. They should come on the stack right
   // after the arguments.
   int LastOffsetFI = -1;
@@ -256,7 +261,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
       continue;
     if (MFI->isDeadObjectIndex(i))
       continue;
-    unsigned Offset = MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+    unsigned Offset = 
+      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
     if (LastOffsetFI == -1)
       LastOffsetFI = i;
     if (Offset > MFI->getObjectOffset(LastOffsetFI))
@@ -265,11 +271,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
   }
 
   // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
-  // be saved in this CPU Area there is the need. This whole Area must 
-  // be aligned to the default Stack Alignment requirements.
-  unsigned StackOffset = 0;
-  unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
-
+  // be saved in this CPU Area. This whole area must be aligned to the 
+  // default Stack Alignment requirements.
   if (LastOffsetFI >= 0)
     StackOffset = MFI->getObjectOffset(LastOffsetFI)+ 
                   MFI->getObjectSize(LastOffsetFI);
@@ -283,21 +286,26 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
     StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
   }
 
-  if (hasFP(MF)) {
+  // Stack locations for FP and RA. If only one of them is used, 
+  // the space must be allocated for both, otherwise no space at all.
+  if (hasFP(MF) || MFI->hasCalls()) {
+    // FP stack location
     MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 
                          StackOffset);
     MipsFI->setFPStackOffset(StackOffset);
     TopCPUSavedRegOff = StackOffset;
     StackOffset += RegSize;
-  }
 
-  if (MFI->hasCalls()) {
+    // SP stack location
     MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                          StackOffset);
     MipsFI->setRAStackOffset(StackOffset);
-    TopCPUSavedRegOff = StackOffset;
     StackOffset += RegSize;
+
+    if (MFI->hasCalls())
+      TopCPUSavedRegOff += RegSize;
   }
+
   StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
   
   // Adjust FPU Callee Saved Registers Area. This Area must be 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 32e0436..237b160 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
 #define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
 
diff --git a/lib/Target/Mips/TargetInfo/Makefile b/lib/Target/Mips/TargetInfo/Makefile
index f27d49e..32f4e16 100644
--- a/lib/Target/Mips/TargetInfo/Makefile
+++ b/lib/Target/Mips/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMMipsInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile
index 27c4045..f4db57e 100644
--- a/lib/Target/PIC16/AsmPrinter/Makefile
+++ b/lib/Target/PIC16/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMPIC16AsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' pic16 target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index 0463596..b015edd 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -35,21 +35,17 @@ using namespace llvm;
 #include "PIC16GenAsmWriter.inc"
 
 PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                 const MCAsmInfo *T, bool V)
-: AsmPrinter(O, TM, T, V), DbgInfo(O, T) {
+                                 MCContext &Ctx, MCStreamer &Streamer,
+                                 const MCAsmInfo *T)
+: AsmPrinter(O, TM, Ctx, Streamer, T), DbgInfo(O, T) {
   PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering());
   PMAI = static_cast<const PIC16MCAsmInfo*>(T);
   PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering();
 }
 
-bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  processDebugLoc(MI, true);
+void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   printInstruction(MI);
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-  processDebugLoc(MI, false);
-  return true;
+  OutStreamer.AddBlankLine();
 }
 
 static int getFunctionColor(const Function *F) {
@@ -96,8 +92,6 @@ void PIC16AsmPrinter::ColorAutoSection(const Function *F) {
 /// directive and file begin debug directive (if required) for the function.
 ///
 bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
   // This calls the base class function required to be called at beginning
   // of runOnMachineFunction.
   SetupMachineFunction(MF);
@@ -112,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   DbgInfo.BeginFunction(MF);
 
   // Now emit the instructions of function in its code section.
-  const MCSection *fCodeSection 
-    = getObjFileLowering().SectionForCode(CurrentFnSym->getName());
+  const MCSection *fCodeSection = 
+    getObjFileLowering().SectionForCode(CurrentFnSym->getName(), 
+                                        PAN::isISR(F->getSection()));
 
   // Start the Code Section.
   O <<  "\n";
@@ -149,7 +144,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       }
         
       // Print the assembly for the instruction.
-      printMachineInstruction(II);
+      EmitInstruction(II);
     }
   }
   
@@ -211,7 +206,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       break;
     }
     case MachineOperand::MO_MachineBasicBlock:
-      O << *GetMBBSymbol(MO.getMBB()->getNumber());
+      O << *MO.getMBB()->getSymbol(OutContext);
       return;
 
     default:
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index 74ab72c..77b6e63 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -31,7 +31,8 @@ namespace llvm {
   class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
   public:
     explicit PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             const MCAsmInfo *T, bool V);
+                             MCContext &Ctx, MCStreamer &Streamer,
+                             const MCAsmInfo *T);
   private:
     virtual const char *getPassName() const {
       return "PIC16 Assembly Printer";
@@ -47,7 +48,7 @@ namespace llvm {
     void printInstruction(const MachineInstr *MI); // definition autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    bool printMachineInstruction(const MachineInstr *MI);
+    void EmitInstruction(const MachineInstr *MI);
     void EmitFunctionDecls (Module &M);
     void EmitUndefinedVars (Module &M);
     void EmitDefinedVars (Module &M);
diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile
index a1dbde5..9e784d1 100644
--- a/lib/Target/PIC16/Makefile
+++ b/lib/Target/PIC16/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMPIC16CodeGen
 TARGET = PIC16
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index e46c9b2..8d067de 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -55,9 +55,10 @@ namespace PIC16CC {
 
   // External symbol names require memory to live till the program end.
   // So we have to allocate it and keep.
+  // FIXME: Don't leak the allocated strings.
   inline static const char *createESName (const std::string &name) {
     char *tmpName = new char[name.size() + 1];
-    strcpy (tmpName, name.c_str());
+    memcpy(tmpName, name.c_str(), name.size() + 1);
     return tmpName;
   }
 
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
index e18ddf1..b0b9318 100644
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ b/lib/Target/PIC16/PIC16ABINames.h
@@ -325,6 +325,19 @@ namespace llvm {
 
       return o.str();
     } 
+
+    // Return true if the current function is an ISR
+    inline static bool isISR(const std::string SectName) {
+       if (SectName.find("interrupt") != std::string::npos)
+         return true;
+
+       return false;
+    }
+
+    // Return the address for ISR starts in rom.
+    inline static std::string getISRAddr(void) {
+      return "0x4";
+    }
   }; // class PAN.
 } // end namespace llvm;
 
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 8368a3c..c517b1b 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -68,7 +68,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
       TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
   }
   
-  // We also need to encode the the information about the base type of
+  // We also need to encode the information about the base type of
   // pointer in TypeNo.
   DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom();
   PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 9f093e8..d2fc8db 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -622,12 +622,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
       ChainHi = Chain.getOperand(1);
     }
     SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
-                                  0 + StoreOffset);
+                                  0 + StoreOffset, false, false, 0);
 
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(4, Ptr.getValueType()));
     SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
-                                  1 + StoreOffset);
+                                  1 + StoreOffset, false, false, 0);
 
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
                        Store2);
@@ -1355,11 +1355,13 @@ GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
 SDValue
 PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool isTailCall,
+                               bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) {
+    // PIC16 target does not yet support tail call optimization.
+    isTailCall = false;
 
     assert(Callee.getValueType() == MVT::i16 &&
            "Don't know how to legalize this call node!!!");
@@ -1511,8 +1513,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(), 
-                                         Op.getNode(), Op.getNode()))
+      if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode()))
         return false;
       else 
         MemOp = 0;
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index afdd4b4..de14520 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -143,7 +143,7 @@ namespace llvm {
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
index 827315e..b080542 100644
--- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
@@ -37,7 +37,7 @@ PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) {
   RomData8bitsDirective = " dw ";
   RomData16bitsDirective = " rom_di ";
   RomData32bitsDirective = " rom_dl ";
-    
+  HasSetDirective = false;  
     
   // Set it to false because we weed to generate c file name and not bc file
   // name.
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index cc71b04..ab81ed1 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -59,6 +59,7 @@ namespace {
     const TargetInstrInfo *TII; // Machine instruction info.
     MachineBasicBlock *MBB;     // Current basic block
     std::string CurBank;
+    int PageChanged;
 
   };
   char MemSelOpt::ID = 0;
@@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
   // Let us assume that when entering a basic block now bank is selected.
   // Ideally we should look at the predecessors for this information.
   CurBank=""; 
+  PageChanged=0;
 
-  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+  MachineBasicBlock::iterator I;
+  for (I = BB.begin(); I != BB.end(); ++I) {
     Changed |= processInstruction(I);
+
+    // if the page has changed insert a page sel before 
+    // any instruction that needs one
+    if (PageChanged == 1)
+    {
+      // Restore the page if it was changed, before leaving the basic block,
+      // because it may be required by the goto terminator or the fall thru
+      // basic blcok.
+      // If the terminator is return, we don't need to restore since there
+      // is no goto or fall thru basic block.
+      if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto
+          (I->getOpcode() == PIC16::sublw_6) || //macro has goto
+          (I->getOpcode() == PIC16::addlwc)  || //macro has goto
+          (TII->get(I->getOpcode()).isBranch()))
+      {
+        DebugLoc dl = I->getDebugLoc();
+        BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
+        Changed = true;
+        PageChanged = 0;	    
+      }
+    }
   }
+
+   // The basic block is over, but if we did not find any goto yet,
+   // we haven't restored the page.
+   // Restore the page if it was changed, before leaving the basic block,
+   // because it may be required by fall thru basic blcok.
+   // If the terminator is return, we don't need to restore since there
+   // is fall thru basic block.
+   if (PageChanged == 1) {
+      // save the end pointer before we move back to last insn.
+     MachineBasicBlock::iterator J = I;
+     I--;
+     const TargetInstrDesc &TID = TII->get(I->getOpcode());
+     if (! TID.isReturn())
+     {
+       DebugLoc dl = I->getDebugLoc();
+       BuildMI(*MBB, J, dl, 
+               TII->get(PIC16::pagesel)).addExternalSymbol("$");
+       Changed = true;
+       PageChanged = 0;
+     }
+   }
+
+
   return Changed;
 }
 
@@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
     return false;
 
+  // The first thing we should do is that record if banksel/pagesel are
+  // changed in an unknown way. This can happend via any type of call. 
+  // We do it here first before scanning of MemOp / BBOp as the indirect
+  // call insns do not have any operands, but they still may change bank/page.
+  if (TID.isCall()) {
+    // Record that we have changed the page, so that we can restore it
+    // before basic block ends.
+    // We require to signal that a page anc bank change happened even for
+    // indirect calls. 
+    PageChanged = 1;
+
+    // When a call is made, there may be banksel for variables in callee.
+    // Hence the banksel in caller needs to be reset.
+    CurBank = "";
+  }
+
   // Scan for the memory address operand.
   // FIXME: Should we use standard interfaces like memoperands_iterator,
   // hasMemOperand() etc ?
   int MemOpPos = -1;
+  int BBOpPos = -1;
   for (unsigned i = 0; i < NumOperands; i++) {
     MachineOperand Op = MI->getOperand(i);
     if (Op.getType() ==  MachineOperand::MO_GlobalAddress ||
-        Op.getType() ==  MachineOperand::MO_ExternalSymbol || 
-        Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+        Op.getType() ==  MachineOperand::MO_ExternalSymbol) { 
       // We found one mem operand. Next one may be BS.
       MemOpPos = i;
-      break;
+    }
+    if (Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+      // We found one BB operand. Next one may be pagesel.
+      BBOpPos = i;
     }
   }
 
   // If we did not find an insn accessing memory. Continue.
-  if (MemOpPos == -1) return Changed;
+  if ((MemOpPos == -1) &&
+      (BBOpPos == -1))
+    return false;
+  assert ((BBOpPos != MemOpPos) && "operand can only be of one type");
  
-  // Get the MemOp.
-  MachineOperand &Op = MI->getOperand(MemOpPos);
 
   // If this is a pagesel material, handle it first.
-  if (MI->getOpcode() == PIC16::CALL ||
-      MI->getOpcode() == PIC16::br_uncond) {
+  // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp.
+  // Pagesel is required only for a direct call.
+  if ((MI->getOpcode() == PIC16::CALL)) {
+    // Get the BBOp.
+    MachineOperand &MemOp = MI->getOperand(MemOpPos);
     DebugLoc dl = MI->getDebugLoc();
-    BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
-      addOperand(Op);
-    return true;
+    BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp);   
+
+    // CALL and br_ucond needs only pagesel. so we are done.
+    return true; 
   }
 
+  // Pagesel is handled. Now, add a Banksel if needed.
+  if (MemOpPos == -1) return Changed;
+  // Get the MemOp.
+  MachineOperand &Op = MI->getOperand(MemOpPos);
+
   // Get the section name(NewBank) for MemOp.
   // This assumes that the section names for globals are already set by
   // AsmPrinter->doInitialization.
   std::string NewBank = CurBank;
+  bool hasExternalLinkage = false;
   if (Op.getType() ==  MachineOperand::MO_GlobalAddress &&
       Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
+    if (Op.getGlobal()->hasExternalLinkage())
+      hasExternalLinkage= true;
     NewBank = Op.getGlobal()->getSection();
   } else if (Op.getType() ==  MachineOperand::MO_ExternalSymbol) {
     // External Symbol is generated for temp data and arguments. They are
@@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
 
   // If the previous and new section names are same, we don't need to
   // emit banksel. 
-  if (NewBank.compare(CurBank) != 0 ) {
+  if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) {
     DebugLoc dl = MI->getDebugLoc();
     BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
       addOperand(Op);
diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile
index fb45d71..9684b8d 100644
--- a/lib/Target/PIC16/PIC16Passes/Makefile
+++ b/lib/Target/PIC16/PIC16Passes/Makefile
@@ -10,7 +10,6 @@ LEVEL = ../../../..
 TARGET = PIC16
 LIBRARYNAME = LLVMpic16passes
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index d7cfe02..b891c18 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
 
 // Interface used by AsmPrinter to get a code section for a function.
 const PIC16Section *
-PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const {
+PIC16TargetObjectFile::SectionForCode(const std::string &FnName,
+                                      bool isISR) const {
   const std::string &sec_name = PAN::getCodeSectionName(FnName);
+  // If it is ISR, its code section starts at a specific address.
+  if (isISR)
+    return getPIC16Section(sec_name, CODE, PAN::getISRAddr());
   return getPIC16Section(sec_name, CODE);
 }
 
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index 0b0ad43..cf8bf84 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -137,7 +137,8 @@ namespace llvm {
 
 
     /// Return a code section for a function.
-    const PIC16Section *SectionForCode (const std::string &FnName) const;
+    const PIC16Section *SectionForCode (const std::string &FnName,
+                                        bool isISR) const;
 
     /// Return a frame section for a function.
     const PIC16Section *SectionForFrame (const std::string &FnName) const;
diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile
index 9004be8..76609f6 100644
--- a/lib/Target/PIC16/TargetInfo/Makefile
+++ b/lib/Target/PIC16/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMPIC16Info
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile
index 4378151..269ef92 100644
--- a/lib/Target/PowerPC/AsmPrinter/Makefile
+++ b/lib/Target/PowerPC/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMPowerPCAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' PowerPC target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index b89c2b4..ac901d0 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -31,13 +31,13 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -47,14 +47,11 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   class PPCAsmPrinter : public AsmPrinter {
   protected:
@@ -63,8 +60,9 @@ namespace {
     uint64_t LabelID;
   public:
     explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           const MCAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V),
+                           MCContext &Ctx, MCStreamer &Streamer,
+                           const MCAsmInfo *T)
+      : AsmPrinter(O, TM, Ctx, Streamer, T),
         Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
 
     virtual const char *getPassName() const {
@@ -98,7 +96,7 @@ namespace {
     static const char *getRegisterName(unsigned RegNo);
 
 
-    void printMachineInstruction(const MachineInstr *MI);
+    virtual void EmitInstruction(const MachineInstr *MI);
     void printOp(const MachineOperand &MO);
 
     /// stripRegisterPrefix - This method strips the character prefix from a
@@ -200,7 +198,7 @@ namespace {
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
-            const MCSymbol *&StubSym =
+            MCSymbol *&StubSym =
               MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
             if (StubSym == 0)
               StubSym = GetGlobalValueSymbol(GV);
@@ -213,8 +211,8 @@ namespace {
           TempNameStr += StringRef(MO.getSymbolName());
           TempNameStr += StringRef("$stub");
           
-          const MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
-          const MCSymbol *&StubSym =
+          MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+          MCSymbol *&StubSym =
             MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
           if (StubSym == 0)
             StubSym = GetExternalSymbolSymbol(MO.getSymbolName());
@@ -319,24 +317,24 @@ namespace {
 
     void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
                                const char *Modifier);
-
-    virtual bool runOnMachineFunction(MachineFunction &F) = 0;
   };
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
   class PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
     explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                const MCAsmInfo *T, bool V)
-      : PPCAsmPrinter(O, TM, T, V){}
+                                MCContext &Ctx, MCStreamer &Streamer,
+                                const MCAsmInfo *T)
+      : PPCAsmPrinter(O, TM, Ctx, Streamer, T) {}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
     }
 
-    bool runOnMachineFunction(MachineFunction &F);
     bool doFinalization(Module &M);
 
+    virtual void EmitFunctionEntryLabel();
+
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
       AU.addRequired<MachineModuleInfo>();
@@ -351,14 +349,14 @@ namespace {
     formatted_raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                 const MCAsmInfo *T, bool V)
-      : PPCAsmPrinter(O, TM, T, V), OS(O) {}
+                                 MCContext &Ctx, MCStreamer &Streamer,
+                                 const MCAsmInfo *T)
+      : PPCAsmPrinter(O, TM, Ctx, Streamer, T), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
     }
 
-    bool runOnMachineFunction(MachineFunction &F);
     bool doFinalization(Module &M);
     void EmitStartOfAsmFile(Module &M);
 
@@ -382,7 +380,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
     llvm_unreachable("printOp() does not handle immediate values");
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
@@ -403,10 +401,10 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
       return;
     }
 
-    const MCSymbol *NLPSym = 
+    MCSymbol *NLPSym = 
       OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
                                    MO.getSymbolName()+"$non_lazy_ptr");
-    const MCSymbol *&StubSym = 
+    MCSymbol *&StubSym = 
       MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
     if (StubSym == 0)
       StubSym = GetExternalSymbolSymbol(MO.getSymbolName());
@@ -424,7 +422,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
         (GV->isDeclaration() || GV->isWeakForLinker())) {
       if (!GV->hasHiddenVisibility()) {
         SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        const MCSymbol *&StubSym = 
+        MCSymbol *&StubSym = 
        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(SymToPrint);
         if (StubSym == 0)
           StubSym = GetGlobalValueSymbol(GV);
@@ -432,7 +430,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
                  GV->hasAvailableExternallyLinkage()) {
         SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
         
-        const MCSymbol *&StubSym = 
+        MCSymbol *&StubSym = 
           MMI->getObjFileInfo<MachineModuleInfoMachO>().
                     getHiddenGVStubEntry(SymToPrint);
         if (StubSym == 0)
@@ -535,20 +533,16 @@ void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 
-/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
-void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-  
-  processDebugLoc(MI, true);
-
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Check for slwi/srwi mnemonics.
-  bool useSubstituteMnemonic = false;
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
     unsigned char MB = MI->getOperand(3).getImm();
     unsigned char ME = MI->getOperand(4).getImm();
+    bool useSubstituteMnemonic = false;
     if (SH <= 31 && MB == 0 && ME == (31-SH)) {
       O << "\tslwi "; useSubstituteMnemonic = true;
     }
@@ -561,122 +555,55 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
       O << ", ";
       printOperand(MI, 1);
       O << ", " << (unsigned int)SH;
+      OutStreamer.AddBlankLine();
+      return;
     }
-  } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) {
-    if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
-      useSubstituteMnemonic = true;
-      O << "\tmr ";
-      printOperand(MI, 0);
-      O << ", ";
-      printOperand(MI, 1);
-    }
-  } else if (MI->getOpcode() == PPC::RLDICR) {
+  }
+  
+  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+    O << "\tmr ";
+    printOperand(MI, 0);
+    O << ", ";
+    printOperand(MI, 1);
+    OutStreamer.AddBlankLine();
+    return;
+  }
+  
+  if (MI->getOpcode() == PPC::RLDICR) {
     unsigned char SH = MI->getOperand(2).getImm();
     unsigned char ME = MI->getOperand(3).getImm();
     // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
     if (63-SH == ME) {
-      useSubstituteMnemonic = true;
       O << "\tsldi ";
       printOperand(MI, 0);
       O << ", ";
       printOperand(MI, 1);
       O << ", " << (unsigned int)SH;
+      OutStreamer.AddBlankLine();
+      return;
     }
   }
 
-  if (!useSubstituteMnemonic)
-    printInstruction(MI);
-
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-
-  processDebugLoc(MI, false);
+  printInstruction(MI);
+  OutStreamer.AddBlankLine();
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
-///
-bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out labels for the function.
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::PrivateLinkage:
-  case Function::InternalLinkage:  // Symbols default to internal.
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.global\t" << *CurrentFnSym << '\n' << "\t.type\t";
-    O << *CurrentFnSym << ", @function\n";
-    break;
-  case Function::LinkerPrivateLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-    O << "\t.global\t" << *CurrentFnSym << '\n';
-    O << "\t.weak\t" << *CurrentFnSym << '\n';
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  EmitAlignment(MF.getAlignment(), F);
-
-  if (Subtarget.isPPC64()) {
-    // Emit an official procedure descriptor.
-    // FIXME 64-bit SVR4: Use MCSection here!
-    O << "\t.section\t\".opd\",\"aw\"\n";
-    O << "\t.align 3\n";
-    O << *CurrentFnSym << ":\n";
-    O << "\t.quad .L." << *CurrentFnSym << ",.TOC.@tocbase\n";
-    O << "\t.previous\n";
-    O << ".L." << *CurrentFnSym << ":\n";
-  } else {
-    O << *CurrentFnSym << ":\n";
-  }
-
-  // Emit pre-function debug information.
-  DW->BeginFunction(&MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-    }
-  }
-
-  O << "\t.size\t" << *CurrentFnSym << ",.-" << *CurrentFnSym << '\n';
-
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
-
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  // We didn't modify anything.
-  return false;
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
+    return AsmPrinter::EmitFunctionEntryLabel();
+    
+  // Emit an official procedure descriptor.
+  // FIXME 64-bit SVR4: Use MCSection here!
+  O << "\t.section\t\".opd\",\"aw\"\n";
+  O << "\t.align 3\n";
+  OutStreamer.EmitLabel(CurrentFnSym);
+  O << "\t.quad .L." << *CurrentFnSym << ",.TOC.@tocbase\n";
+  O << "\t.previous\n";
+  O << ".L." << *CurrentFnSym << ":\n";
 }
 
+
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   const TargetData *TD = TM.getTargetData();
 
@@ -697,81 +624,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   return AsmPrinter::doFinalization(M);
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
-///
-bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out labels for the function.
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::PrivateLinkage:
-  case Function::InternalLinkage:  // Symbols default to internal.
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.globl\t" << *CurrentFnSym << '\n';
-    break;
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-  case Function::LinkerPrivateLinkage:
-    O << "\t.globl\t" << *CurrentFnSym << '\n';
-    O << "\t.weak_definition\t" << *CurrentFnSym << '\n';
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  EmitAlignment(MF.getAlignment(), F);
-  O << *CurrentFnSym << ":\n";
-
-  // Emit pre-function debug information.
-  DW->BeginFunction(&MF);
-
-  // If the function is empty, then we need to emit *something*. Otherwise, the
-  // function's label might be associated with something that it wasn't meant to
-  // be associated with. We emit a noop in this situation.
-  MachineFunction::iterator I = MF.begin();
-
-  if (++I == MF.end() && MF.front().empty())
-    O << "\tnop\n";
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-    }
-  }
-
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
-
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  // We didn't modify anything.
-  return false;
-}
-
-
 void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
   static const char *const CPUDirectives[] = {
     "",
@@ -928,9 +780,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
     for (std::vector<Function *>::const_iterator I = Personalities.begin(),
          E = Personalities.end(); I != E; ++I) {
       if (*I) {
-        const MCSymbol *NLPSym = 
-          GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
-        const MCSymbol *&StubSym = MMIMacho.getGVStubEntry(NLPSym);
+        MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+        MCSymbol *&StubSym = MMIMacho.getGVStubEntry(NLPSym);
         StubSym = GetGlobalValueSymbol(*I);
       }
     }
@@ -981,13 +832,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 ///
 static AsmPrinter *createPPCAsmPrinterPass(formatted_raw_ostream &o,
                                            TargetMachine &tm,
-                                           const MCAsmInfo *tai,
-                                           bool verbose) {
+                                           MCContext &Ctx, MCStreamer &Streamer,
+                                           const MCAsmInfo *tai) {
   const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
 
   if (Subtarget->isDarwin())
-    return new PPCDarwinAsmPrinter(o, tm, tai, verbose);
-  return new PPCLinuxAsmPrinter(o, tm, tai, verbose);
+    return new PPCDarwinAsmPrinter(o, tm, Ctx, Streamer, tai);
+  return new PPCLinuxAsmPrinter(o, tm, Ctx, Streamer, tai);
 }
 
 // Force static initialization.
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index bdd6d36..c997c5c 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -19,7 +19,6 @@ add_llvm_target(PowerPCCodeGen
   PPCISelDAGToDAG.cpp
   PPCISelLowering.cpp
   PPCJITInfo.cpp
-  PPCMachOWriterInfo.cpp
   PPCMCAsmInfo.cpp
   PPCPredicates.cpp
   PPCRegisterInfo.cpp
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index cd30011..1265f1d 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMPowerPCCodeGen
 TARGET = PPC
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 7b98268..67e3a4a 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -23,18 +23,12 @@
 namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
-  class MachineCodeEmitter;
-  class ObjectCodeEmitter;
   class formatted_raw_ostream;
   
 FunctionPass *createPPCBranchSelectionPass();
 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
-                                       MachineCodeEmitter &MCE);
 FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
                                           JITCodeEmitter &MCE);
-FunctionPass *createPPCObjectCodeEmitterPass(PPCTargetMachine &TM,
-                                             ObjectCodeEmitter &OCE);
 
 extern Target ThePPC32Target;
 extern Target ThePPC64Target;
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index c7ce171..155fba2 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[
 // PowerPC System V Release 4 ABI
 //===----------------------------------------------------------------------===//
 
-// _Complex arguments are never split, thus their two scalars are either
-// passed both in argument registers or both on the stack. Also _Complex
-// arguments are always passed in general purpose registers, never in
-// Floating-point registers or vector registers. Arguments which should go
-// on the stack are marked with the inreg parameter attribute.
-// Giving inreg this target-dependent (and counter-intuitive) meaning
-// simplifies things, because functions calls are not always coming from the
-// frontend but are also created implicitly e.g. for libcalls. If inreg would
-// actually mean that the argument is passed in a register, then all places
-// which create function calls/function definitions implicitly would need to
-// be aware of this fact and would need to mark arguments accordingly. With
-// inreg meaning that the argument is passed on the stack, this is not an
-// issue, except for calls which involve _Complex types.
-
 def CC_PPC_SVR4_Common : CallingConv<[
   // The ABI requires i64 to be passed in two adjacent registers with the first
   // register having an odd register number.
   CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
 
   // The first 8 integer arguments are passed in integer registers.
-  CCIfType<[i32], CCIf<"!ArgFlags.isInReg()",
-    CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
 
   // Make sure the i64 words from a long double are either both passed in
   // registers or both passed on the stack.
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index da9ea36..327470d 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -17,26 +17,34 @@
 #include "PPC.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 namespace {
-  class PPCCodeEmitter {
+  class PPCCodeEmitter : public MachineFunctionPass {
     TargetMachine &TM;
-    MachineCodeEmitter &MCE;
+    JITCodeEmitter &MCE;
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    
+    static char ID;
+    
+    /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+    /// its address in the function into this pointer.
+    void *MovePCtoLROffset;
   public:
-    PPCCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce):
-        TM(tm), MCE(mce) {}
+    
+    PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(&ID), TM(tm), MCE(mce) {}
 
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -49,27 +57,6 @@ namespace {
     unsigned getMachineOpValue(const MachineInstr &MI,
                                const MachineOperand &MO);
 
-    /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
-    /// its address in the function into this pointer.
-
-    void *MovePCtoLROffset;
-  };
-
-  template <class CodeEmitter>
-  class Emitter : public MachineFunctionPass, public PPCCodeEmitter {
-    TargetMachine &TM;
-    CodeEmitter &MCE;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<MachineModuleInfo>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-  public:
-    static char ID;
-    Emitter(TargetMachine &tm, CodeEmitter &mce)
-      : MachineFunctionPass(&ID), PPCCodeEmitter(tm, mce), TM(tm), MCE(mce) {}
-
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
     /// runOnMachineFunction - emits the given MachineFunction to memory
@@ -84,31 +71,18 @@ namespace {
     ///
     unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
   };
-
-  template <class CodeEmitter>
-    char Emitter<CodeEmitter>::ID = 0;
 }
 
+char PPCCodeEmitter::ID = 0;
+
 /// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
 /// to the specified MCE object.
-
-FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM,
-                                             MachineCodeEmitter &MCE) {
-  return new Emitter<MachineCodeEmitter>(TM, MCE);
-}
-
 FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
-  return new Emitter<JITCodeEmitter>(TM, JCE);
-}
-
-FunctionPass *llvm::createPPCObjectCodeEmitterPass(PPCTargetMachine &TM,
-                                                   ObjectCodeEmitter &OCE) {
-  return new Emitter<ObjectCodeEmitter>(TM, OCE);
+  return new PPCCodeEmitter(TM, JCE);
 }
 
-template <class CodeEmitter>
-bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
@@ -124,8 +98,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-template <class CodeEmitter>
-void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
   MCE.StartMachineBasicBlock(&MBB);
 
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
@@ -135,12 +108,12 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
     default:
       MCE.emitWordBE(getBinaryCodeForInstr(MI));
       break;
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
       MCE.emitLabel(MI.getOperand(0).getImm());
       break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
       break; // pseudo opcode, no side effects
     case PPC::MovePCtoLR:
     case PPC::MovePCtoLR8:
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6af7e0f..3a15f7e 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -118,7 +118,7 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
 }
 
 /// getHazardType - We return hazard for any non-branch instruction that would
-/// terminate terminate the dispatch group.  We turn NoopHazard for any
+/// terminate the dispatch group.  We turn NoopHazard for any
 /// instructions that wouldn't terminate the dispatch group that would cause a
 /// pipeline flush.
 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 32c1879..004997f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -199,7 +199,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   // Check to see if this function uses vector registers, which means we have to
   // save and restore the VRSAVE register and update it with the regs we use.  
   //
-  // In this case, there will be virtual registers of vector type type created
+  // In this case, there will be virtual registers of vector type created
   // by the scheduler.  Detect them now.
   bool HasVectorVReg = false;
   for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 8248c94..e73af56 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -25,13 +25,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
 
   // If the global is weak or external, we have to go through the lazy
   // resolution stub.
-  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
@@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                        false, false, 0);
   }
 
   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
@@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
   // Store first byte : number of int regs
   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
-                                         Op.getOperand(1), SV, 0, MVT::i8);
+                                         Op.getOperand(1), SV, 0, MVT::i8,
+                                         false, false, 0);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+                      false, false, 0);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
   // Store second word : arguments given on stack
   SDValue thirdStore =
-    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+                 false, false, 0);
   nextOffset += FrameOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
 
   // Store third word : arguments given in registers
-  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
+  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+                      false, false, 0);
 
 }
 
@@ -1572,7 +1578,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
   unsigned PtrByteSize = 4;
 
   // Assign locations to all of the incoming arguments.
@@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     unsigned GPRIndex = 0;
     for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     unsigned FPRIndex = 0;
     for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1773,7 +1784,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
@@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                               NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+                                            NULL, 0,
+                                            ObjSize==1 ? MVT::i8 : MVT::i16,
+                                            false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
@@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                       false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
           ArgOffset += PtrByteSize;
@@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+                           false, false, 0);
     }
 
     InVals.push_back(ArgVal);
@@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2164,7 +2180,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                       PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
-  if (CC==CallingConv::Fast && PerformTailCallOpt) {
+  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
     unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
       getStackAlignment();
     unsigned AlignMask = TargetAlign-1;
@@ -2200,6 +2216,9 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
+  if (!GuaranteedTailCallOpt)
+    return false;
+
   // Variable argument functions are not supported.
   if (isVarArg)
     return false;
@@ -2268,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
     // Store relative to framepointer.
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
                                        PseudoSourceValue::getFixedStack(FI),
-                                       0));
+                                       0, false, false, 0));
   }
 }
 
@@ -2294,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         PseudoSourceValue::getFixedStack(NewRetAddr), 0);
+                         PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+                         false, false, 0);
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
@@ -2305,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                           true, false);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+                           PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+                           false, false, 0);
     }
   }
   return Chain;
@@ -2343,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
-    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+                          false, false, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
     
     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
-      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+                            false, false, 0);
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
   }
@@ -2392,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                            DAG.getConstant(ArgOffset, PtrVT));
     }
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   // Calculate and remember argument location.
   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
                                   TailCallArguments);
@@ -2601,7 +2625,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   // the stack. Account for this here so these bytes can be pushed back on in
   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
-    (CallConv==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
@@ -2673,11 +2697,15 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 SDValue
 PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall,
+                             bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) {
+  if (isTailCall)
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+                                                   Ins, DAG);
+
   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
     return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
                           isTailCall, Outs, Ins,
@@ -2700,10 +2728,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
-  assert((!isTailCall ||
-          (CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
-         "IsEligibleForTailCallOptimization missed a case!");
-
   assert((CallConv == CallingConv::C ||
           CallConv == CallingConv::Fast) && "Unknown calling convention!");
 
@@ -2717,7 +2741,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (PerformTailCallOpt && CallConv==CallingConv::Fast)
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
   
   // Count how many bytes are to be pushed on the stack, including the linkage
@@ -2859,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                              PseudoSourceValue::getStack(), LocMemOffset));
+                                           PseudoSourceValue::getStack(), LocMemOffset,
+                                           false, false, 0));
       } else {
         // Calculate and remember argument location.
         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
@@ -2920,7 +2945,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (PerformTailCallOpt && CallConv==CallingConv::Fast)
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   unsigned nAltivecParamsAtEnd = 0;
@@ -3021,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
-                                          NULL, 0, VT);
+                                        NULL, 0, VT, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
@@ -3058,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
-          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;
@@ -3089,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
         if (isVarArg) {
-          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0);
           MemOpChains.push_back(Store);
 
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+                                       false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+                                       false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
@@ -3144,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                             DAG.getConstant(ArgOffset, PtrVT));
-        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                     false, false, 0);
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
-          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
@@ -3157,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                   DAG.getConstant(i, PtrVT));
-          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
@@ -3222,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     // TOC save area offset.
     SDValue PtrOff = DAG.getIntPtrConstant(40);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0);
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+                         false, false, 0);
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3297,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   SDValue SaveSP = Op.getOperand(1);
 
   // Load the old link SP.
-  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+                                   false, false, 0);
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
 
   // Store the old link SP.
-  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+                      false, false, 0);
 }
 
 
@@ -3480,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
+                               false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
   if (Op.getValueType() == MVT::i32)
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3530,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
     DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
                             Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3575,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
-                                 StackSlot, NULL, 0);
+                               StackSlot, NULL, 0, false, false, 0);
 
   // Load FP Control Word from low 32 bits of stack slot.
   SDValue Four = DAG.getConstant(4, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
-  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+                            false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -4246,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
 
   // Store the input value into Value#0 of the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                                 Op.getOperand(0), FIdx, NULL, 0);
+                               Op.getOperand(0), FIdx, NULL, 0,
+                               false, false, 0);
   // Load it out.
-  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -5457,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
   // to the stack.
   FuncInfo->setLRStoreRequired();
   return DAG.getLoad(getPointerTy(), dl,
-                     DAG.getEntryNode(), RetAddrFI, NULL, 0);
+                     DAG.getEntryNode(), RetAddrFI, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index cf81395..9c390ac 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -345,13 +345,6 @@ namespace llvm {
     /// the offset of the target addressing mode.
     virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
 
-    virtual bool
-    IsEligibleForTailCallOptimization(SDValue Callee,
-                                      CallingConv::ID CalleeCC,
-                                      bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      SelectionDAG& DAG) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     
     virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
@@ -365,6 +358,13 @@ namespace llvm {
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
+    bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
+
     SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                          int SPDiff,
                                          SDValue Chain,
@@ -431,7 +431,7 @@ namespace llvm {
 
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index af7d812..3db623a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -421,22 +421,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                          FrameIdx));
       return true;
     } else {
-      // FIXME: We use R0 here, because it isn't available for RA.  We need to
-      // store the CR in the low 4-bits of the saved value.  First, issue a MFCR
-      // to save all of the CRBits.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+
+      // We need to store the CR in the low 4-bits of the saved value.  First,
+      // issue a MFCR to save all of the CRBits.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? 
+                                                           PPC::R2 : PPC::R0;
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
     
       // If the saved register wasn't CR0, shift the bits left so that they are
       // in CR0's slot.
       if (SrcReg != PPC::CR0) {
         unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
-        // rlwinm r0, r0, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
-                       .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+        // rlwinm scratch, scratch, ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                       .addReg(ScratchReg).addImm(ShiftBits)
+                       .addImm(0).addImm(31));
       }
     
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(PPC::R0,
+                                         .addReg(ScratchReg,
                                                  getKillRegState(isKill)),
                                          FrameIdx));
     }
@@ -540,20 +548,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (RC == PPC::CRRCRegisterClass) {
-    // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
-                                       FrameIdx));
+    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+    // it's possible for the stack frame to be so big the save location is
+    // out of range of immediate offsets, necessitating another register.
+    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+    // at the moment.
+    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                          PPC::R2 : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), 
+                                       ScratchReg), FrameIdx));
     
     // If the reloaded register isn't CR0, shift the bits right so that they are
     // in the right CR's slot.
     if (DestReg != PPC::CR0) {
       unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
       // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
-                    .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                    .addImm(31));
     }
     
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+                     .addReg(ScratchReg));
   } else if (RC == PPC::CRBITRCRegisterClass) {
    
     unsigned Reg = 0;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index d2ff3b7..b37aee8 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -26,6 +26,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
 }
 
 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
   CommentString = "#";
   GlobalPrefix = "";
   PrivateGlobalPrefix = ".L";
@@ -49,9 +52,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
   AbsoluteEHSectionOffsets = false;
     
   ZeroDirective = "\t.space\t";
-  SetDirective = "\t.set";
   Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
-  AlignmentIsInBytes = false;
   HasLCOMMDirective = true;
   AssemblerDialect = 0;           // Old-Style mnemonics.
 }
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
deleted file mode 100644
index 4c14454..0000000
--- a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Mach-O writer information for the PowerPC backend.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCMachOWriterInfo.h"
-#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
-#include "llvm/CodeGen/MachORelocation.h"
-#include "llvm/Support/OutputBuffer.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstdio>
-using namespace llvm;
-
-PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM)
-  : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ?
-                          HDR_CPU_TYPE_POWERPC64 :
-                          HDR_CPU_TYPE_POWERPC,
-                          HDR_CPU_SUBTYPE_POWERPC_ALL) {}
-PPCMachOWriterInfo::~PPCMachOWriterInfo() {}
-
-/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or
-/// more PowerPC MachORelocation(s), add the new relocations to the
-/// MachOSection, and rewrite the instruction at the section offset if required
-/// by that relocation type.
-unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR,
-                                                 unsigned FromIdx,
-                                                 unsigned ToAddr,
-                                                 unsigned ToIdx,
-                                                 OutputBuffer &RelocOut,
-                                                 OutputBuffer &SecOut,
-                                                 bool Scattered,
-                                                 bool isExtern) const {
-  unsigned NumRelocs = 0;
-  uint64_t Addr = 0;
-
-  // Get the address of whatever it is we're relocating, if possible.
-  if (!isExtern)
-    Addr = (uintptr_t)MR.getResultPointer() + ToAddr;
-
-  switch ((PPC::RelocationType)MR.getRelocationType()) {
-  default: llvm_unreachable("Unknown PPC relocation type!");
-  case PPC::reloc_absolute_low_ix:
-    llvm_unreachable("Unhandled PPC relocation type!");
-    break;
-  case PPC::reloc_vanilla:
-    {
-      // FIXME: need to handle 64 bit vanilla relocs
-      MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx,
-                              false, 2, isExtern,
-                              PPC_RELOC_VANILLA,
-                              Scattered, (intptr_t)MR.getResultPointer());
-      ++NumRelocs;
-
-      if (Scattered) {
-        RelocOut.outword(VANILLA.getPackedFields());
-        RelocOut.outword(VANILLA.getAddress());
-      } else {
-        RelocOut.outword(VANILLA.getAddress());
-        RelocOut.outword(VANILLA.getPackedFields());
-      }
-      
-      intptr_t SymbolOffset;
-
-      if (Scattered)
-        SymbolOffset = Addr + MR.getConstantVal();
-      else
-        SymbolOffset = Addr;
-
-      printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx,
-             unsigned(MR.getMachineCodeOffset()),
-             unsigned(SymbolOffset));
-      SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset());
-    }
-    break;
-  case PPC::reloc_pcrel_bx:
-    {
-      // FIXME: Presumably someday we will need to branch to other, non-extern
-      // functions too.  Need to figure out some way to distinguish between
-      // target is BB and target is function.
-      if (isExtern) {
-        MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2, 
-                             isExtern, PPC_RELOC_BR24, Scattered, 
-                             (intptr_t)MR.getMachineCodeOffset());
-        RelocOut.outword(BR24.getAddress());
-        RelocOut.outword(BR24.getPackedFields());
-        ++NumRelocs;
-      }
-
-      Addr -= MR.getMachineCodeOffset();
-      Addr >>= 2;
-      Addr &= 0xFFFFFF;
-      Addr <<= 2;
-      Addr |= (SecOut[MR.getMachineCodeOffset()] << 24);
-      Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3);
-      SecOut.fixword(Addr, MR.getMachineCodeOffset());
-      break;
-    }
-  case PPC::reloc_pcrel_bcx:
-    {
-      Addr -= MR.getMachineCodeOffset();
-      Addr &= 0xFFFC;
-
-      SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
-      break;
-    }
-  case PPC::reloc_absolute_high:
-    {
-      MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2,
-                           isExtern, PPC_RELOC_HA16);
-      MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern,
-                           PPC_RELOC_PAIR);
-      NumRelocs = 2;
-
-      RelocOut.outword(HA16.getRawAddress());
-      RelocOut.outword(HA16.getPackedFields());
-      RelocOut.outword(PAIR.getRawAddress());
-      RelocOut.outword(PAIR.getPackedFields());
-
-      Addr += 0x8000;
-
-      SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2);
-      break;
-    }
-  case PPC::reloc_absolute_low:
-    {
-      MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2,
-                           isExtern, PPC_RELOC_LO16);
-      MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern,
-                           PPC_RELOC_PAIR);
-      NumRelocs = 2;
-
-      RelocOut.outword(LO16.getRawAddress());
-      RelocOut.outword(LO16.getPackedFields());
-      RelocOut.outword(PAIR.getRawAddress());
-      RelocOut.outword(PAIR.getPackedFields());
-
-      SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
-      break;
-    }
-  }
-
-  return NumRelocs;
-}
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h
deleted file mode 100644
index d46334d..0000000
--- a/lib/Target/PowerPC/PPCMachOWriterInfo.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Mach-O writer information for the PowerPC backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPC_MACHO_WRITER_INFO_H
-#define PPC_MACHO_WRITER_INFO_H
-
-#include "llvm/Target/TargetMachOWriterInfo.h"
-
-namespace llvm {
-
-  // Forward declarations
-  class MachineRelocation;
-  class OutputBuffer;
-  class PPCTargetMachine;
-
-  class PPCMachOWriterInfo : public TargetMachOWriterInfo {
-  public:
-    PPCMachOWriterInfo(const PPCTargetMachine &TM);
-    virtual ~PPCMachOWriterInfo();
-
-    virtual unsigned GetTargetRelocation(MachineRelocation &MR,
-                                         unsigned FromIdx,
-                                         unsigned ToAddr,
-                                         unsigned ToIdx,
-                                         OutputBuffer &RelocOut,
-                                         OutputBuffer &SecOut,
-                                         bool Scattered, bool Extern) const;
-
-    // Constants for the relocation r_type field.
-    // See <mach-o/ppc/reloc.h>
-    enum {
-      PPC_RELOC_VANILLA, // generic relocation
-      PPC_RELOC_PAIR,    // the second relocation entry of a pair
-      PPC_RELOC_BR14,    // 14 bit branch displacement to word address
-      PPC_RELOC_BR24,    // 24 bit branch displacement to word address
-      PPC_RELOC_HI16,    // a PAIR follows with the low 16 bits
-      PPC_RELOC_LO16,    // a PAIR follows with the high 16 bits
-      PPC_RELOC_HA16,    // a PAIR follows, which is sign extended to 32b
-      PPC_RELOC_LO14     // LO16 with low 2 bits implicitly zero
-    };
-  };
-
-} // end llvm namespace
-
-#endif // PPC_MACHO_WRITER_INFO_H
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 0c3c8eb..0b509ac 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -406,7 +406,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
 static bool needsFP(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return NoFramePointerElim || MFI->hasVarSizedObjects() ||
-    (PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
 static bool spillsCR(const MachineFunction &MF) {
@@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R2);  // System-reserved register
     Reserved.set(PPC::R13); // Small Data Area pointer register
   }
+  // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+  // when the stack frame is too big to address directly; we need two regs.
+  // This is a hack.
+  if (Subtarget.isDarwinABI()) {
+    Reserved.set(PPC::R2);
+  }
   
   // On PPC64, r13 is the thread pointer. Never allocate this register.
   // Note that this is over conservative, as it also prevents allocation of R31
@@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     if (Subtarget.isSVR4ABI()) {
       Reserved.set(PPC::X2);
     }
+    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // when the stack frame is too big to address directly; we need two regs.
+    // This is a hack.
+    if (Subtarget.isDarwinABI()) {
+      Reserved.set(PPC::X2);
+    }
   }
 
   if (needsFP(MF))
@@ -486,7 +498,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
 void PPCRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+  if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
     // Add (actually subtract) back the amount the callee popped on return.
     if (int CalleeAmt =  I->getOperand(1).getImm()) {
       bool is64Bit = Subtarget.isPPC64();
@@ -724,7 +736,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
   // Take into account whether it's an add or mem instruction
   unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
-  if (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+  if (MI.isInlineAsm())
     OffsetOperandNo = FIOperandNo-1;
 
   // Get the frame index.
@@ -817,7 +829,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   //   addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
   unsigned OperandBase;
 
-  if (OpC != TargetInstrInfo::INLINEASM) {
+  if (OpC != TargetOpcode::INLINEASM) {
     assert(ImmToIdxMap.count(OpC) &&
            "No indexed form of load or store available!");
     unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -1050,7 +1062,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
-  if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
     MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta,
                                          true, false);
   }
@@ -1160,7 +1172,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
 
   // Take into account stack space reserved for tail calls.
   int TCSPDelta = 0;
-  if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
     LowerBound = TCSPDelta;
   }
 
@@ -1575,7 +1587,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
     // The loaded (or persistent) stack pointer value is offset by the 'stwu'
     // on entry to the function.  Add this offset back now.
     if (!isPPC64) {
-      // If this function contained a fastcc call and PerformTailCallOpt is
+      // If this function contained a fastcc call and GuaranteedTailCallOpt is
       // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
       // call which invalidates the stack pointer value in SP(0). So we use the
       // value of R31 in this case.
@@ -1654,7 +1666,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
-  if (PerformTailCallOpt && RetOpcode == PPC::BLR &&
+  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
       MF.getFunction()->getCallingConv() == CallingConv::Fast) {
      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index f75e781..40914ba 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -130,7 +130,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
     return false;
   // If symbol visibility is hidden, the extra load is not needed if
   // the symbol is definitely defined in the current translation unit.
-  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
   if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
     return false;
   return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index c7f7882..cac6962 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -45,7 +45,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
     Subtarget(TT, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) {
+    InstrItins(Subtarget.getInstrItineraryData()) {
 
   if (getRelocationModel() == Reloc::Default) {
     if (Subtarget.isDarwin())
@@ -91,33 +91,6 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
 
 bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      MachineCodeEmitter &MCE) {
-  // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
-  // FIXME: This should be moved to TargetJITInfo!!
-  if (Subtarget.isPPC64()) {
-    // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
-    // instructions to materialize arbitrary global variable + function +
-    // constant pool addresses.
-    setRelocationModel(Reloc::PIC_);
-    // Temporary workaround for the inability of PPC64 JIT to handle jump
-    // tables.
-    DisableJumpTables = true;      
-  } else {
-    setRelocationModel(Reloc::Static);
-  }
-  
-  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
-  // writing?
-  Subtarget.SetJITMode();
-  
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCCodeEmitterPass(*this, MCE));
-
-  return false;
-}
-
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {
   // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
   // FIXME: This should be moved to TargetJITInfo!!
@@ -142,83 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
 
   return false;
 }
-
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      ObjectCodeEmitter &OCE) {
-  // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
-  // FIXME: This should be moved to TargetJITInfo!!
-  if (Subtarget.isPPC64()) {
-    // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
-    // instructions to materialize arbitrary global variable + function +
-    // constant pool addresses.
-    setRelocationModel(Reloc::PIC_);
-    // Temporary workaround for the inability of PPC64 JIT to handle jump
-    // tables.
-    DisableJumpTables = true;      
-  } else {
-    setRelocationModel(Reloc::Static);
-  }
-  
-  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
-  // writing?
-  Subtarget.SetJITMode();
-  
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCObjectCodeEmitterPass(*this, OCE));
-
-  return false;
-}
-
-bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            MachineCodeEmitter &MCE) {
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCCodeEmitterPass(*this, MCE));
-  return false;
-}
-
-bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            JITCodeEmitter &JCE) {
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-  return false;
-}
-
-bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            ObjectCodeEmitter &OCE) {
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const {
-  if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10)
-    return DwarfLSDAEncoding::Default;
-
-  return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 4afcb23..ac9ae2b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -19,7 +19,6 @@
 #include "PPCJITInfo.h"
 #include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
-#include "PPCMachOWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 
@@ -37,7 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
   PPCJITInfo          JITInfo;
   PPCTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
-  PPCMachOWriterInfo  MachOWriterInfo;
 
 public:
   PPCTargetMachine(const Target &T, const std::string &TT,
@@ -58,40 +56,12 @@ public:
   virtual const InstrItineraryData getInstrItineraryData() const {  
     return InstrItins;
   }
-  virtual const PPCMachOWriterInfo *getMachOWriterInfo() const {
-    return &MachOWriterInfo;
-  }
-
-  /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
-  /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
-  /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
-  /// as a 4-byte pointer by default. However, some systems may require a
-  /// different size due to bugs or other conditions. We will default to a
-  /// 4-byte encoding unless the system tells us otherwise.
-  ///
-  /// FIXME: This call-back isn't good! We should be using the correct encoding
-  /// regardless of the system. However, there are some systems which have bugs
-  /// that prevent this from occuring.
-  virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
 
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              MachineCodeEmitter &MCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &JCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              ObjectCodeEmitter &OCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    MachineCodeEmitter &MCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    JITCodeEmitter &JCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    ObjectCodeEmitter &OCE);
   virtual bool getEnableTailMergeDefault() const;
 };
 
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 060d6a5..e49bda0 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -430,6 +430,35 @@ This theoretically may help improve twolf slightly (used in dimbox.c:142?).
 
 ===-------------------------------------------------------------------------===
 
+PR5945: This: 
+define i32 @clamp0g(i32 %a) {
+entry:
+        %cmp = icmp slt i32 %a, 0
+        %sel = select i1 %cmp, i32 0, i32 %a
+        ret i32 %sel
+}
+
+Is compile to this with the PowerPC (32-bit) backend:
+
+_clamp0g:
+        cmpwi cr0, r3, 0
+        li r2, 0
+        blt cr0, LBB1_2
+; BB#1:                                                     ; %entry
+        mr r2, r3
+LBB1_2:                                                     ; %entry
+        mr r3, r2
+        blr
+
+This could be reduced to the much simpler:
+
+_clamp0g:
+        srawi r2, r3, 31
+        andc r3, r3, r2
+        blr
+
+===-------------------------------------------------------------------------===
+
 int foo(int N, int ***W, int **TK, int X) {
   int t, i;
   
@@ -635,6 +664,32 @@ This sort of thing occurs a lot due to globalopt.
 
 ===-------------------------------------------------------------------------===
 
+We compile:
+
+define i32 @bar(i32 %x) nounwind readnone ssp {
+entry:
+  %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %neg = sext i1 %0 to i32              ; <i32> [#uses=1]
+  ret i32 %neg
+}
+
+to:
+
+_bar:
+	cntlzw r2, r3
+	slwi r2, r2, 26
+	srawi r3, r2, 31
+	blr 
+
+it would be better to produce:
+
+_bar: 
+        addic r3,r3,-1
+        subfe r3,r3,r3
+        blr
+
+===-------------------------------------------------------------------------===
+
 We currently compile 32-bit bswap:
 
 declare i32 @llvm.bswap.i32(i32 %A)
@@ -840,3 +895,20 @@ define double @test_FNEG_sel(double %A, double %B, double %C) {
         ret double %E
 }
 
+//===----------------------------------------------------------------------===//
+The save/restore sequence for CR in prolog/epilog is terrible:
+- Each CR subreg is saved individually, rather than doing one save as a unit.
+- On Darwin, the save is done after the decrement of SP, which means the offset
+from SP of the save slot can be too big for a store instruction, which means we
+need an additional register (currently hacked in 96015+96020; the solution there
+is correct, but poor).
+- On SVR4 the same thing can happen, and I don't think saving before the SP
+decrement is safe on that target, as there is no red zone.  This is currently
+broken AFAIK, although it's not a target I can exercise.
+The following demonstrates the problem:
+extern void bar(char *p);
+void foo() {
+  char x[100000];
+  bar(x);
+  __asm__("" ::: "cr2");
+}
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index 16d0167..a101aa4 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMPowerPCInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 080ea42..4fd46a8 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -156,6 +156,45 @@ void f () {  /* this can be optimized to four additions... */
 This requires reassociating to forms of expressions that are already available,
 something that reassoc doesn't think about yet.
 
+
+//===---------------------------------------------------------------------===//
+
+This function: (derived from GCC PR19988)
+double foo(double x, double y) {
+  return ((x + 0.1234 * y) * (x + -0.1234 * y));
+}
+
+compiles to:
+_foo:
+	movapd	%xmm1, %xmm2
+	mulsd	LCPI1_1(%rip), %xmm1
+	mulsd	LCPI1_0(%rip), %xmm2
+	addsd	%xmm0, %xmm1
+	addsd	%xmm0, %xmm2
+	movapd	%xmm1, %xmm0
+	mulsd	%xmm2, %xmm0
+	ret
+
+Reassociate should be able to turn it into:
+
+double foo(double x, double y) {
+  return ((x + 0.1234 * y) * (x - 0.1234 * y));
+}
+
+Which allows the multiply by constant to be CSE'd, producing:
+
+_foo:
+	mulsd	LCPI1_0(%rip), %xmm1
+	movapd	%xmm1, %xmm2
+	addsd	%xmm0, %xmm2
+	subsd	%xmm1, %xmm0
+	mulsd	%xmm2, %xmm0
+	ret
+
+This doesn't need -ffast-math support at all.  This is particularly bad because
+the llvm-gcc frontend is canonicalizing the later into the former, but clang
+doesn't have this problem.
+
 //===---------------------------------------------------------------------===//
 
 These two functions should generate the same code on big-endian systems:
@@ -237,24 +276,6 @@ define void @test(i32* %P) {
 
 //===---------------------------------------------------------------------===//
 
-dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x.
-
-Compile:
-
-int bar(int x)
-{
-  int t = __builtin_clz(x);
-  return -(t>>5);
-}
-
-to:
-
-_bar:   addic r3,r3,-1
-        subfe r3,r3,r3
-        blr
-
-//===---------------------------------------------------------------------===//
-
 quantum_sigma_x in 462.libquantum contains the following loop:
 
       for(i=0; i<reg->size; i++)
@@ -294,6 +315,8 @@ unsigned long reverse(unsigned v) {
 
 //===---------------------------------------------------------------------===//
 
+[LOOP RECOGNITION]
+
 These idioms should be recognized as popcount (see PR1488):
 
 unsigned countbits_slow(unsigned v) {
@@ -356,12 +379,36 @@ this construct.
 
 //===---------------------------------------------------------------------===//
 
+[LOOP RECOGNITION]
+
 viterbi speeds up *significantly* if the various "history" related copy loops
 are turned into memcpy calls at the source level.  We need a "loops to memcpy"
 pass.
 
 //===---------------------------------------------------------------------===//
 
+[LOOP OPTIMIZATION]
+
+SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization
+opportunities in its double_array_divs_variable function: it needs loop
+interchange, memory promotion (which LICM already does), vectorization and
+variable trip count loop unrolling (since it has a constant trip count). ICC
+apparently produces this very nice code with -ffast-math:
+
+..B1.70:                        # Preds ..B1.70 ..B1.69
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       addl      $8, %edx                                      #
+       cmpl      $131072, %edx                                 #108.2
+       jb        ..B1.70       # Prob 99%                      #108.2
+
+It would be better to count down to zero, but this is a lot better than what we
+do.
+
+//===---------------------------------------------------------------------===//
+
 Consider:
 
 typedef unsigned U32;
@@ -1218,9 +1265,16 @@ store->load.
 
 //===---------------------------------------------------------------------===//
 
+[ALIAS ANALYSIS]
+
 Type based alias analysis:
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
 
+We should do better analysis of posix_memalign.  At the least it should
+no-capture its pointer argument, at best, we should know that the out-value
+result doesn't point to anything (like malloc).  One example of this is in
+SingleSource/Benchmarks/Misc/dt.c
+
 //===---------------------------------------------------------------------===//
 
 A/B get pinned to the stack because we turn an if/then into a select instead
@@ -1697,22 +1751,71 @@ from gcc.
 Missed instcombine transformation:
 define i32 @a(i32 %x) nounwind readnone {
 entry:
-  %shr = lshr i32 %x, 5                           ; <i32> [#uses=1]
-  %xor = xor i32 %shr, 67108864                   ; <i32> [#uses=1]
-  %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
+  %rem = srem i32 %x, 32
+  %shl = shl i32 1, %rem
+  ret i32 %shl
+}
+
+The srem can be transformed to an and because if x is negative, the shift is
+undefined. Testcase derived from gcc.
+
+//===---------------------------------------------------------------------===//
+
+Missed instcombine/dagcombine transformation:
+define i32 @a(i32 %x, i32 %y) nounwind readnone {
+entry:
+  %mul = mul i32 %y, -8
+  %sub = sub i32 %x, %mul
   ret i32 %sub
 }
 
-This function is equivalent to "ashr i32 %x, 5".  Testcase derived from gcc.
+Should compile to something like x+y*8, but currently compiles to an
+inefficient result.  Testcase derived from gcc.
 
 //===---------------------------------------------------------------------===//
 
-isSafeToLoadUnconditionally should allow a GEP of a global/alloca with constant
-indicies within the bounds of the allocated object. Reduced example:
+Missed instcombine/dagcombine transformation:
+define void @lshift_lt(i8 zeroext %a) nounwind {
+entry:
+  %conv = zext i8 %a to i32
+  %shl = shl i32 %conv, 3
+  %cmp = icmp ult i32 %shl, 33
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+declare void @bar() nounwind
 
-const int a[] = {3,6};
-int b(int y) { int* x = y ? &a[0] : &a[1]; return *x; }
+The shift should be eliminated.  Testcase derived from gcc.
 
-All the loads should be eliminated.  Testcase derived from gcc.
+//===---------------------------------------------------------------------===//
+
+These compile into different code, one gets recognized as a switch and the
+other doesn't due to phase ordering issues (PR6212):
+
+int test1(int mainType, int subType) {
+  if (mainType == 7)
+    subType = 4;
+  else if (mainType == 9)
+    subType = 6;
+  else if (mainType == 11)
+    subType = 9;
+  return subType;
+}
+
+int test2(int mainType, int subType) {
+  if (mainType == 7)
+    subType = 4;
+  if (mainType == 9)
+    subType = 6;
+  if (mainType == 11)
+    subType = 9;
+  return subType;
+}
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
index 404fad1..a856828 100644
--- a/lib/Target/Sparc/AsmPrinter/Makefile
+++ b/lib/Target/Sparc/AsmPrinter/Makefile
@@ -8,8 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMSparcAsmPrinter
-CXXFLAGS = -fno-rtti
-
 
 # Hack: we need to include 'main' Sparc target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 8fc4e5a..9a2ce6b 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -16,45 +16,23 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DwarfWriter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MathExtras.h"
-#include <cctype>
-#include <cstring>
-#include <map>
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   class SparcAsmPrinter : public AsmPrinter {
-    /// We name each basic block in a Function with a unique number, so
-    /// that we can consistently refer to them later. This is cleared
-    /// at the beginning of each call to runOnMachineFunction().
-    ///
-    typedef std::map<const Value *, unsigned> ValueMapTy;
-    ValueMapTy NumberForBB;
-    unsigned BBNumber;
   public:
     explicit SparcAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             const MCAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V), BBNumber(0) {}
+                             MCContext &Ctx, MCStreamer &Streamer,
+                             const MCAsmInfo *T)
+      : AsmPrinter(O, TM, Ctx, Streamer, T) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -65,114 +43,24 @@ namespace {
                          const char *Modifier = 0);
     void printCCOperand(const MachineInstr *MI, int opNum);
 
+    virtual void EmitInstruction(const MachineInstr *MI) {
+      printInstruction(MI);
+      OutStreamer.AddBlankLine();
+    }
     void printInstruction(const MachineInstr *MI);  // autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    bool runOnMachineFunction(MachineFunction &F);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode);
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode);
 
-    void emitFunctionHeader(const MachineFunction &MF);
     bool printGetPCX(const MachineInstr *MI, unsigned OpNo);
   };
 } // end of anonymous namespace
 
 #include "SparcGenAsmWriter.inc"
 
-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
-///
-bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  this->MF = &MF;
-
-  SetupMachineFunction(MF);
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // BBNumber is used here so that a given Printer will never give two
-  // BBs the same name. (If you have a better way, please let me know!)
-
-  O << "\n\n";
-  emitFunctionHeader(MF);
-  
-  
-  // Emit pre-function debug information.
-  DW->BeginFunction(&MF);
-
-  // Number each basic block so that we can consistently refer to them
-  // in PC-relative references.
-  // FIXME: Why not use the MBB numbers?
-  NumberForBB.clear();
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    NumberForBB[I->getBasicBlock()] = BBNumber++;
-  }
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      processDebugLoc(II, true);
-      printInstruction(II);
-      
-      if (VerboseAsm)
-        EmitComments(*II);
-      O << '\n';
-      processDebugLoc(II, false);
-      ++EmittedInsts;
-    }
-  }
-
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
-
-  // We didn't modify anything.
-  O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
-  return false;
-}
-
-void SparcAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-  EmitAlignment(MF.getAlignment(), F);
-  
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type");
-  case Function::PrivateLinkage:
-  case Function::InternalLinkage:
-    // Function is internal.
-    break;
-  case Function::DLLExportLinkage:
-  case Function::ExternalLinkage:
-    // Function is externally visible
-    O << "\t.global\t" << *CurrentFnSym << '\n';
-    break;
-  case Function::LinkerPrivateLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-    // Function is weak
-    O << "\t.weak\t" << *CurrentFnSym << '\n';
-    break;
-  }
-  
-  printVisibility(CurrentFnSym, F->getVisibility());
-  
-  O << "\t.type\t" << *CurrentFnSym << ", #function\n";
-  O << *CurrentFnSym << ":\n";
-}
-
-
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand (opNum);
   bool CloseParen = false;
@@ -193,7 +81,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << (int)MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress:
     O << *GetGlobalValueSymbol(MO.getGlobal());
@@ -252,7 +140,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) {
     break;
   }
 
-  unsigned bbNum = NumberForBB[MI->getParent()->getBasicBlock()];
+  unsigned bbNum = MI->getParent()->getNumber();
 
   O << '\n' << ".LLGETPCH" << bbNum << ":\n";
   O << "\tcall\t.LLGETPC" << bbNum << '\n' ;
@@ -312,4 +200,5 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() { 
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
 }
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index d3e2a89..e407848 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMSparcCodeGen
 TARGET = Sparc
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index bb5155e1..a37920d 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -29,6 +29,7 @@ namespace llvm {
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
 
   extern Target TheSparcTarget;
+  extern Target TheSparcV9Target;
 
 } // end namespace llvm;
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 1b3ca3e..4e93ef0 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load;
         if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                             false, false, 0);
         } else {
           ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
 
@@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
                               DAG.getConstant(Offset, MVT::i32));
           Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
-                                NULL, 0, ObjectVT);
+                                NULL, 0, ObjectVT, false, false, 0);
           Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
         }
         InVals.push_back(Load);
@@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                             true, false);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
+        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
+                                   false, false, 0);
         InVals.push_back(Load);
       }
       ArgOffset += 4;
@@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                               true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                              false, false, 0);
         }
 
         SDValue LoVal;
@@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
                                                               true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                              false, false, 0);
         }
 
         // Compose the two halves together into an i64 unit.
@@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                                           true, false);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
-      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+                                       false, false, 0));
       ArgOffset += 4;
     }
 
@@ -252,11 +257,13 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 SDValue
 SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool isTailCall,
+                               bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) {
+  // Sparc target does not yet support tail call optimization.
+  isTailCall = false;
 
 #if 0
   // Analyze operands of the call, assigning locations to each operand.
@@ -337,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // FIXME: VERIFY THAT 68 IS RIGHT.
     SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
     PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   }
 
 #else
@@ -383,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       // out the parts as integers.  Top part goes in a reg.
       SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 
-                                   Val, StackPtr, NULL, 0);
+                                   Val, StackPtr, NULL, 0,
+                                   false, false, 0);
       // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+                               false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+                               false, false, 0);
 
       RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
 
@@ -433,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
       PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
       MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, 
-                                         PtrOff, NULL, 0));
+                                         PtrOff, NULL, 0,
+                                         false, false, 0));
     }
     ArgOffset += ObjSize;
   }
@@ -757,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0);
+                     AbsAddr, NULL, 0, false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -778,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0);
+                     AbsAddr, NULL, 0, false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -870,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                  DAG.getConstant(TLI.getVarArgsFrameOffset(),
                                                  MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -880,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
+                               false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
                                                   MVT::i32));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
-                         VAListPtr, SV, 0);
+                         VAListPtr, SV, 0, false, false, 0);
   // Load the actual argument out of the pointer VAList, unless this is an
   // f64 load.
   if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);
+    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+                          false, false, 0);
 
   // Bit-Convert the value to f64.
   SDValue Ops[2] = {
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 55781be..2ee73c1 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -87,7 +87,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool isTailCall,
+                bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp
index b67537c..53a9bde 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp
@@ -21,7 +21,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
   Data64bitsDirective = 0;  // .xword is only supported by V9.
   ZeroDirective = "\t.skip\t";
   CommentString = "!";
-  COMMDirectiveTakesAlignment = true;
   HasLEB128 = true;
   AbsoluteDebugSectionOffsets = true;
   SupportsDebugInformation = true;
@@ -30,7 +29,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
   UsesELFSectionDirectiveForBSS = true;
 
   WeakRefDirective = "\t.weak\t";
-  SetDirective = "\t.set\t";
 
   PrivateGlobalPrefix = ".L";
 }
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 8a88cc0..ce11af1 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -15,29 +15,20 @@
 #include "SparcGenSubtarget.inc"
 using namespace llvm;
 
-// FIXME: temporary.
-#include "llvm/Support/CommandLine.h"
-namespace {
-  cl::opt<bool> EnableV9("enable-sparc-v9-insts", cl::Hidden,
-                          cl::desc("Enable V9 instructions in the V8 target"));
-}
-
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS) {
-  // Set the default features.
-  IsV9 = false;
-  V8DeprecatedInsts = false;
-  IsVIS = false;
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, 
+                               bool is64Bit) :
+  IsV9(false),
+  V8DeprecatedInsts(false),
+  IsVIS(false),
+  Is64Bit(is64Bit) {
   
   // Determine default and user specified characteristics
-  std::string CPU = "generic";
+  const char *CPU = "v8";
+  if (is64Bit) {
+    CPU = "v9";
+    IsV9 = true;
+  }
 
-  // FIXME: autodetect host here!
-  CPU = "v9";   // What is a good way to detect V9?
-  
   // Parse features string.
   ParseSubtargetFeatures(FS, CPU);
-
-  // Unless explicitly enabled, disable the V9 instructions.
-  if (!EnableV9)
-    IsV9 = false;
 }
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 4377034..cec0ab4 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -23,8 +23,10 @@ class SparcSubtarget : public TargetSubtarget {
   bool IsV9;
   bool V8DeprecatedInsts;
   bool IsVIS;
+  bool Is64Bit;
+  
 public:
-  SparcSubtarget(const std::string &TT, const std::string &FS);
+  SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit);
 
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
@@ -34,7 +36,17 @@ public:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   std::string ParseSubtargetFeatures(const std::string &FS,
                                      const std::string &CPU);
-
+  
+  bool is64Bit() const { return Is64Bit; }
+  std::string getDataLayout() const {
+    const char *p;
+    if (is64Bit()) {
+      p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+    } else {
+      p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+    }
+    return std::string(p);
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1eec112..a676623 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -19,18 +19,22 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeSparcTarget() {
   // Register the target.
-  RegisterTargetMachine<SparcTargetMachine> X(TheSparcTarget);
-  RegisterAsmInfo<SparcELFMCAsmInfo> Y(TheSparcTarget);
+  RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+  RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+
+  RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget);
+  RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target);
 
 }
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
 SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
-                                       const std::string &FS)
+                                       const std::string &FS, bool is64bit)
   : LLVMTargetMachine(T, TT),
-    DataLayout("E-p:32:32-f128:128:128-n32"),
-    Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget),
+    Subtarget(TT, FS, is64bit),
+    DataLayout(Subtarget.getDataLayout()),
+     TLInfo(*this), InstrInfo(Subtarget),
     FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
 }
 
@@ -49,3 +53,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
   PM.add(createSparcDelaySlotFillerPass(*this));
   return true;
 }
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+                                           const std::string &TT, 
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, FS, false) {
+}
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
+                                           const std::string &TT, 
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, FS, true) {
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index cce5510..5834d08 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -24,14 +24,14 @@
 namespace llvm {
 
 class SparcTargetMachine : public LLVMTargetMachine {
-  const TargetData DataLayout;       // Calculates type size & alignment
   SparcSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
   SparcTargetLowering TLInfo;
   SparcInstrInfo InstrInfo;
   TargetFrameInfo FrameInfo;
 public:
   SparcTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &FS, bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
@@ -49,6 +49,22 @@ public:
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
 };
 
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+public:
+  SparcV8TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+public:
+  SparcV9TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/Sparc/TargetInfo/Makefile b/lib/Target/Sparc/TargetInfo/Makefile
index 0827fdb..641ed87 100644
--- a/lib/Target/Sparc/TargetInfo/Makefile
+++ b/lib/Target/Sparc/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMSparcInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 5d697bd..5c06f07 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -13,7 +13,9 @@
 using namespace llvm;
 
 Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
 
 extern "C" void LLVMInitializeSparcTargetInfo() { 
   RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+  RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
 }
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 7cc4fd1..2094cc9 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -67,7 +67,7 @@ static void Split(std::vector<std::string> &V, const std::string &S) {
   while (true) {
     // Find the next comma
     size_t Comma = S.find(',', Pos);
-    // If no comma found then the the rest of the string is used
+    // If no comma found then the rest of the string is used
     if (Comma == std::string::npos) {
       // Add string to vector
       V.push_back(S.substr(Pos));
diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile
index 36cd6f8..9a350df 100644
--- a/lib/Target/SystemZ/AsmPrinter/Makefile
+++ b/lib/Target/SystemZ/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMSystemZAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' SystemZ target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 5c3fe37..7a9e8dd 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -32,19 +32,17 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 namespace {
   class SystemZAsmPrinter : public AsmPrinter {
   public:
     SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                      const MCAsmInfo *MAI, bool V)
-      : AsmPrinter(O, TM, MAI, V) {}
+                      MCContext &Ctx, MCStreamer &Streamer,
+                      const MCAsmInfo *MAI)
+      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
 
     virtual const char *getPassName() const {
       return "SystemZ Assembly Printer";
@@ -67,10 +65,7 @@ namespace {
     void printInstruction(const MachineInstr *MI);  // autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    void printMachineInstruction(const MachineInstr * MI);
-
-    void emitFunctionHeader(const MachineFunction &MF);
-    bool runOnMachineFunction(MachineFunction &F);
+    void EmitInstruction(const MachineInstr *MI);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AsmPrinter::getAnalysisUsage(AU);
@@ -81,82 +76,10 @@ namespace {
 
 #include "SystemZGenAsmWriter.inc"
 
-void SystemZAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
-  unsigned FnAlign = MF.getAlignment();
-  const Function *F = MF.getFunction();
-
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-
-  EmitAlignment(FnAlign, F);
-
-  switch (F->getLinkage()) {
-  default: assert(0 && "Unknown linkage type!");
-  case Function::InternalLinkage:  // Symbols default to internal.
-  case Function::PrivateLinkage:
-  case Function::LinkerPrivateLinkage:
-    break;
-  case Function::ExternalLinkage:
-    O << "\t.globl\t" << *CurrentFnSym << '\n';
-    break;
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-    O << "\t.weak\t" << *CurrentFnSym << '\n';
-    break;
-  }
-
-  printVisibility(CurrentFnSym, F->getVisibility());
-
-  O << "\t.type\t" << *CurrentFnSym << ",@function\n";
-  O << *CurrentFnSym << ":\n";
-}
-
-bool SystemZAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  SetupMachineFunction(MF);
-  O << "\n\n";
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print the 'header' of function
-  emitFunctionHeader(MF);
-
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    EmitBasicBlockStart(I);
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II)
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-  }
-
-  if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
-
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
-  // We didn't modify anything
-  return false;
-}
-
-void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-
-  processDebugLoc(MI, true);
-
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
-  
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-
-  processDebugLoc(MI, false);
+  OutStreamer.AddBlankLine();
 }
 
 void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){
@@ -166,7 +89,7 @@ void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
@@ -221,7 +144,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 6d0cbbd..5b44090 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMSystemZCodeGen
 TARGET = SystemZ
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 7096c0e..7f0d9fb 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -594,8 +594,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
 bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
-      N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P, P))
+      IsLegalToFold(N, P, P))
     return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
   return false;
 }
@@ -665,10 +664,10 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
       Dividend = N0.getNode();
 
     // Insert prepared dividend into suitable 'subreg'
-    SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                          dl, ResVT);
     Dividend =
-      CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+      CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                              SDValue(Tmp, 0), SDValue(Dividend, 0),
                              CurDAG->getTargetConstant(subreg_odd, MVT::i32));
 
@@ -687,7 +686,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     // Copy the division (odd subreg) result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
-      SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
@@ -702,7 +701,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     // Copy the remainder (even subreg) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
-      SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
@@ -749,12 +748,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     SDNode *Dividend = N0.getNode();
 
     // Insert prepared dividend into suitable 'subreg'
-    SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                          dl, ResVT);
     {
       unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
       Dividend =
-        CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+        CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                                SDValue(Tmp, 0), SDValue(Dividend, 0),
                                CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
     }
@@ -777,7 +776,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     // Copy the division (odd subreg) result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
-      SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
@@ -791,7 +790,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
     // Copy the remainder (even subreg) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
-      SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index d6b476e..6f4b30f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -30,9 +30,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -250,11 +250,13 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
 SDValue
 SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                  CallingConv::ID CallConv, bool isVarArg,
-                                 bool isTailCall,
+                                 bool &isTailCall,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<ISD::InputArg> &Ins,
                                  DebugLoc dl, SelectionDAG &DAG,
                                  SmallVectorImpl<SDValue> &InVals) {
+  // SystemZ target does not yet support tail call optimization.
+  isTailCall = false;
 
   switch (CallConv) {
   default:
@@ -335,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
       // from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
-                             PseudoSourceValue::getFixedStack(FI), 0);
+                             PseudoSourceValue::getFixedStack(FI), 0,
+                             false, false, 0);
     }
 
     // If this is an 8/16/32-bit value, it is really passed promoted to 64
@@ -433,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(Offset));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), Offset));
+                                         PseudoSourceValue::getStack(), Offset,
+                                         false, false, 0));
     }
   }
 
@@ -736,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
 
   if (ExtraLoadRequired)
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 5bf1ed6..36ff994 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -125,7 +125,7 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals);
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index ba392bb..1a09206 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -16,11 +16,8 @@
 using namespace llvm;
 
 SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
-  AlignmentIsInBytes = true;
-
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
-  SetDirective = "\t.set\t";
   PCSymbol = ".";
 }
 
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 1318195..fe50c90 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -86,10 +86,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-int SystemZRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
+                                             int FI) const {
   const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  SystemZMachineFunctionInfo *SystemZMFI =
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZMachineFunctionInfo *SystemZMFI =
     MF.getInfo<SystemZMachineFunctionInfo>();
   int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
   uint64_t StackSize = MFI->getStackSize();
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 93f6aee..fabd4e8 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -49,7 +49,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
   bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
   bool hasFP(const MachineFunction &MF) const;
 
-  int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
index 9f36b2c..0be80eb 100644
--- a/lib/Target/SystemZ/TargetInfo/Makefile
+++ b/lib/Target/SystemZ/TargetInfo/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMSystemZInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/TargetAsmLexer.cpp b/lib/Target/TargetAsmLexer.cpp
index 0ae6c14..d4893ff 100644
--- a/lib/Target/TargetAsmLexer.cpp
+++ b/lib/Target/TargetAsmLexer.cpp
@@ -10,5 +10,5 @@
 #include "llvm/Target/TargetAsmLexer.h"
 using namespace llvm;
 
-TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T) {}
+TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
 TargetAsmLexer::~TargetAsmLexer() {}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index ba3cc9d..295b30f 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -545,6 +545,13 @@ unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
   return getAlignment(Ty, true);
 }
 
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+  return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
 unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
   for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
     if (Alignments[i].AlignType == STACK_ALIGN)
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a231ebc..0c105e9 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -19,17 +19,15 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
 }
 
 /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
-/// pc-relative reference to the specified global variable from exception
-/// handling information.  In addition to the symbol, this returns
-/// by-reference:
-///
-/// IsIndirect - True if the returned symbol is actually a stub that contains
-///    the address of the symbol, false if the symbol is the global itself.
-///
-/// IsPCRel - True if the symbol reference is already pc-relative, false if
-///    the caller needs to subtract off the address of the reference from the
-///    symbol.
-///
+/// reference to the specified global variable from exception
+/// handling information.
 const MCExpr *TargetLoweringObjectFile::
 getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  // The generic implementation of this just returns a direct reference to the
-  // symbol.
-  IsIndirect = false;
-  IsPCRel    = false;
-  
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
   // FIXME: Use GetGlobalValueSymbol.
   SmallString<128> Name;
   Mang->getNameWithPrefix(Name, GV, false);
-  return MCSymbolRefExpr::Create(Name.str(), getContext());
-}
-
-
-//===----------------------------------------------------------------------===//
-//                                  ELF
-//===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
-  // If we have the section uniquing map, free it.
-  delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit) const {
-  if (UniquingMap == 0)
-    UniquingMap = new ELFUniqueMapTy();
-  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionELF *&Entry = Map[Section];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
-                                      getContext());
-}
-
-void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
-                                             const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((ELFUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-
-  BSSSection =
-    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
-
-  TextSection =
-    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getText());
-
-  DataSection =
-    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
-
-  ReadOnlySection =
-    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC,
-                  SectionKind::getReadOnly());
-
-  TLSDataSection =
-    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
-
-  TLSBSSSection =
-    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
-
-  DataRelSection =
-    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  DataRelLocalSection =
-    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRelLocal());
-
-  DataRelROSection =
-    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRel());
-
-  DataRelROLocalSection =
-    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRelLocal());
-
-  MergeableConst4Section =
-    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst4());
-
-  MergeableConst8Section =
-    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst8());
-
-  MergeableConst16Section =
-    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst16());
-
-  StaticCtorSection =
-    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  StaticDtorSection =
-    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  // Exception Handling Sections.
-
-  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
-  // it contains relocatable pointers.  In PIC mode, this is probably a big
-  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
-  // adjusted or this should be a data section.
-  LSDASection =
-    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
-  EHFrameSection =
-    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  // Debug Info Sections.
-  DwarfAbbrevSection =
-    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfInfoSection =
-    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfLineSection =
-    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfFrameSection =
-    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfStrSection =
-    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfLocSection =
-    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfARangesSection =
-    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfRangesSection =
-    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-}
-
-
-static SectionKind
-getELFKindForNamedSection(StringRef Name, SectionKind K) {
-  if (Name.empty() || Name[0] != '.') return K;
-
-  // Some lame default implementation based on some magic section names.
-  if (Name == ".bss" ||
-      Name.startswith(".bss.") ||
-      Name.startswith(".gnu.linkonce.b.") ||
-      Name.startswith(".llvm.linkonce.b.") ||
-      Name == ".sbss" ||
-      Name.startswith(".sbss.") ||
-      Name.startswith(".gnu.linkonce.sb.") ||
-      Name.startswith(".llvm.linkonce.sb."))
-    return SectionKind::getBSS();
-
-  if (Name == ".tdata" ||
-      Name.startswith(".tdata.") ||
-      Name.startswith(".gnu.linkonce.td.") ||
-      Name.startswith(".llvm.linkonce.td."))
-    return SectionKind::getThreadData();
-
-  if (Name == ".tbss" ||
-      Name.startswith(".tbss.") ||
-      Name.startswith(".gnu.linkonce.tb.") ||
-      Name.startswith(".llvm.linkonce.tb."))
-    return SectionKind::getThreadBSS();
-
-  return K;
-}
-
-
-static unsigned getELFSectionType(StringRef Name, SectionKind K) {
-
-  if (Name == ".init_array")
-    return MCSectionELF::SHT_INIT_ARRAY;
-
-  if (Name == ".fini_array")
-    return MCSectionELF::SHT_FINI_ARRAY;
+  const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
 
-  if (Name == ".preinit_array")
-    return MCSectionELF::SHT_PREINIT_ARRAY;
-
-  if (K.isBSS() || K.isThreadBSS())
-    return MCSectionELF::SHT_NOBITS;
-
-  return MCSectionELF::SHT_PROGBITS;
-}
-
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
-  unsigned Flags = 0;
-
-  if (!K.isMetadata())
-    Flags |= MCSectionELF::SHF_ALLOC;
-
-  if (K.isText())
-    Flags |= MCSectionELF::SHF_EXECINSTR;
-
-  if (K.isWriteable())
-    Flags |= MCSectionELF::SHF_WRITE;
-
-  if (K.isThreadLocal())
-    Flags |= MCSectionELF::SHF_TLS;
-
-  // K.isMergeableConst() is left out to honour PR4650
-  if (K.isMergeableCString() || K.isMergeableConst4() ||
-      K.isMergeableConst8() || K.isMergeableConst16())
-    Flags |= MCSectionELF::SHF_MERGE;
-
-  if (K.isMergeableCString())
-    Flags |= MCSectionELF::SHF_STRINGS;
-
-  return Flags;
-}
-
-
-const MCSection *TargetLoweringObjectFileELF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  StringRef SectionName = GV->getSection();
-
-  // Infer section flags from the section name if we can.
-  Kind = getELFKindForNamedSection(SectionName, Kind);
-
-  return getELFSection(SectionName,
-                       getELFSectionType(SectionName, Kind),
-                       getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".gnu.linkonce.t.";
-  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
-
-  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
-  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
-
-  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
-  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
-  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".gnu.linkonce.d.rel.ro.";
+  return getSymbolForDwarfReference(Sym, MMI, Encoding);
 }
 
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
-    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name;
-    Name.append(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
-    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
-                         getELFSectionFlags(Kind), Kind);
-  }
-
-  if (Kind.isText()) return TextSection;
-
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString() ||
-      Kind.isMergeable4ByteCString()) {
-
-    // We also need alignment here.
-    // FIXME: this is getting the alignment of the character, not the
-    // alignment of the global!
-    unsigned Align =
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
-
-    const char *SizeSpec = ".rodata.str1.";
-    if (Kind.isMergeable2ByteCString())
-      SizeSpec = ".rodata.str2.";
-    else if (Kind.isMergeable4ByteCString())
-      SizeSpec = ".rodata.str4.";
-    else
-      assert(Kind.isMergeable1ByteCString() && "unknown string width");
-
-
-    std::string Name = SizeSpec + utostr(Align);
-    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                         MCSectionELF::SHF_ALLOC |
-                         MCSectionELF::SHF_MERGE |
-                         MCSectionELF::SHF_STRINGS,
-                         Kind);
-  }
-
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4() && MergeableConst4Section)
-      return MergeableConst4Section;
-    if (Kind.isMergeableConst8() && MergeableConst8Section)
-      return MergeableConst8Section;
-    if (Kind.isMergeableConst16() && MergeableConst16Section)
-      return MergeableConst16Section;
-    return ReadOnlySection;  // .const
-  }
-
-  if (Kind.isReadOnly())             return ReadOnlySection;
-
-  if (Kind.isThreadData())           return TLSDataSection;
-  if (Kind.isThreadBSS())            return TLSBSSSection;
-
-  // Note: we claim that common symbols are put in BSSSection, but they are
-  // really emitted with the magic .comm directive, which creates a symbol table
-  // entry but not a section.
-  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
-
-  if (Kind.isDataNoRel())            return DataSection;
-  if (Kind.isDataRelLocal())         return DataRelLocalSection;
-  if (Kind.isDataRel())              return DataRelSection;
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
-}
-
-/// getSectionForConstant - Given a mergeable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *TargetLoweringObjectFileELF::
-getSectionForConstant(SectionKind Kind) const {
-  if (Kind.isMergeableConst4() && MergeableConst4Section)
-    return MergeableConst4Section;
-  if (Kind.isMergeableConst8() && MergeableConst8Section)
-    return MergeableConst8Section;
-  if (Kind.isMergeableConst16() && MergeableConst16Section)
-    return MergeableConst16Section;
-  if (Kind.isReadOnly())
-    return ReadOnlySection;
-
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
-}
-
-//===----------------------------------------------------------------------===//
-//                                 MachO
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, getContext());
-}
-
-
-void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
-                                               const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((MachOUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-
-  TextSection // .text
-    = getMachOSection("__TEXT", "__text",
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
-  DataSection // .data
-    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
-
-  CStringSection // .cstring
-    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
-                      SectionKind::getMergeable1ByteCString());
-  UStringSection
-    = getMachOSection("__TEXT","__ustring", 0,
-                      SectionKind::getMergeable2ByteCString());
-  FourByteConstantSection // .literal4
-    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
-                      SectionKind::getMergeableConst4());
-  EightByteConstantSection // .literal8
-    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
-                      SectionKind::getMergeableConst8());
-
-  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
-  // to using it in -static mode.
-  SixteenByteConstantSection = 0;
-  if (TM.getRelocationModel() != Reloc::Static &&
-      TM.getTargetData()->getPointerSize() == 32)
-    SixteenByteConstantSection =   // .literal16
-      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
-                      SectionKind::getMergeableConst16());
-
-  ReadOnlySection  // .const
-    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
-
-  TextCoalSection
-    = getMachOSection("__TEXT", "__textcoal_nt",
-                      MCSectionMachO::S_COALESCED |
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
-  ConstTextCoalSection
-    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
-  ConstDataCoalSection
-    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
-  ConstDataSection  // .const_data
-    = getMachOSection("__DATA", "__const", 0,
-                      SectionKind::getReadOnlyWithRel());
-  DataCoalSection
-    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
-                      SectionKind::getDataRel());
-  DataCommonSection
-    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
-                      SectionKind::getBSS());
-  DataBSSSection
-    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
-                    SectionKind::getBSS());
-  
-
-  LazySymbolPointerSection
-    = getMachOSection("__DATA", "__la_symbol_ptr",
-                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
-  NonLazySymbolPointerSection
-    = getMachOSection("__DATA", "__nl_symbol_ptr",
-                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorSection
-      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
-    StaticDtorSection
-      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
-  } else {
-    StaticCtorSection
-      = getMachOSection("__DATA", "__mod_init_func",
-                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
-                        SectionKind::getDataRel());
-    StaticDtorSection
-      = getMachOSection("__DATA", "__mod_term_func",
-                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
-                        SectionKind::getDataRel());
-  }
-
-  // Exception Handling.
-  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
-                                SectionKind::getDataRel());
-  EHFrameSection =
-    getMachOSection("__TEXT", "__eh_frame",
-                    MCSectionMachO::S_COALESCED |
-                    MCSectionMachO::S_ATTR_NO_TOC |
-                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                    SectionKind::getReadOnly());
-
-  // Debug Information.
-  DwarfAbbrevSection =
-    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfInfoSection =
-    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfLineSection =
-    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfFrameSection =
-    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfStrSection =
-    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfLocSection =
-    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfARangesSection =
-    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfRangesSection =
-    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfDebugInlineSection =
-    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  // Parse the section specifier and create it if valid.
-  StringRef Segment, Section;
-  unsigned TAA, StubSize;
-  std::string ErrorCode =
-    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
-                                          TAA, StubSize);
-  if (!ErrorCode.empty()) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
-                      "' has an invalid section specifier '" + GV->getSection()+
-                      "': " + ErrorCode + ".");
-    // Fall back to dropping it into the data section.
-    return DataSection;
-  }
-
-  // Get the section.
-  const MCSectionMachO *S =
-    getMachOSection(Segment, Section, TAA, StubSize, Kind);
-
-  // Okay, now that we got the section, verify that the TAA & StubSize agree.
-  // If the user declared multiple globals with different section flags, we need
-  // to reject it here.
-  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
-                      "' section type or attributes does not match previous"
-                      " section specifier");
-  }
-
-  return S;
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
-
-  if (Kind.isText())
-    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
-
-  // If this is weak/linkonce, put this in a coalescable section, either in text
-  // or data depending on if it is writable.
-  if (GV->isWeakForLinker()) {
-    if (Kind.isReadOnly())
-      return ConstTextCoalSection;
-    return DataCoalSection;
-  }
-
-  // FIXME: Alignment check should be handled by section classifier.
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString()) {
-    if (TM.getTargetData()->getPreferredAlignment(
-                                              cast<GlobalVariable>(GV)) < 32) {
-      if (Kind.isMergeable1ByteCString())
-        return CStringSection;
-      assert(Kind.isMergeable2ByteCString());
-      return UStringSection;
-    }
-  }
-
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4())
-      return FourByteConstantSection;
-    if (Kind.isMergeableConst8())
-      return EightByteConstantSection;
-    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
-      return SixteenByteConstantSection;
-  }
-
-  // Otherwise, if it is readonly, but not something we can specially optimize,
-  // just drop it in .const.
-  if (Kind.isReadOnly())
-    return ReadOnlySection;
-
-  // If this is marked const, put it into a const section.  But if the dynamic
-  // linker needs to write to it, put it in the data segment.
-  if (Kind.isReadOnlyWithRel())
-    return ConstDataSection;
-
-  // Put zero initialized globals with strong external linkage in the
-  // DATA, __common section with the .zerofill directive.
-  if (Kind.isBSSExtern())
-    return DataCommonSection;
-
-  // Put zero initialized globals with local linkage in __DATA,__bss directive
-  // with the .zerofill directive (aka .lcomm).
-  if (Kind.isBSSLocal())
-    return DataBSSSection;
-  
-  // Otherwise, just drop the variable in the normal data section.
-  return DataSection;
-}
-
-const MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
-  // If this constant requires a relocation, we have to put it in the data
-  // segment, not in the text segment.
-  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
-    return ConstDataSection;
-
-  if (Kind.isMergeableConst4())
-    return FourByteConstantSection;
-  if (Kind.isMergeableConst8())
-    return EightByteConstantSection;
-  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
-    return SixteenByteConstantSection;
-  return ReadOnlySection;  // .const
-}
-
-/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
-/// not to emit the UsedDirective for some symbols in llvm.used.
-// FIXME: REMOVE this (rdar://7071300)
-bool TargetLoweringObjectFileMachO::
-shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
-  /// On Darwin, internally linked data beginning with "L" or "l" does not have
-  /// the directive emitted (this occurs in ObjC metadata).
-  if (!GV) return false;
-
-  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
-  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
-    // FIXME: ObjC metadata is currently emitted as internal symbols that have
-    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
-    // this horrible hack can go away.
-    SmallString<64> Name;
-    Mang->getNameWithPrefix(Name, GV, false);
-    if (Name[0] == 'L' || Name[0] == 'l')
-      return false;
+const MCExpr *TargetLoweringObjectFile::
+getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI,
+                           unsigned Encoding) const {
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+  switch (Encoding & 0xF0) {
+  default:
+    llvm_report_error("Do not support this DWARF encoding yet!");
+    break;
+  case dwarf::DW_EH_PE_absptr:
+    // Do nothing special
+    break;
+  case dwarf::DW_EH_PE_pcrel:
+    // FIXME: PCSymbol
+    const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext());
+    Res = MCBinaryExpr::CreateSub(Res, PC, getContext());
+    break;
   }
 
-  return true;
+  return Res;
 }
 
-const MCExpr *TargetLoweringObjectFileMachO::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  // The mach-o version of this method defaults to returning a stub reference.
-  IsIndirect = true;
-  IsPCRel    = false;
-  
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, true);
-  Name += "$non_lazy_ptr";
-  return MCSymbolRefExpr::Create(Name.str(), getContext());
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-
-//===----------------------------------------------------------------------===//
-//                                  COFF
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
-TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
-  delete (COFFUniqueMapTy*)UniquingMap;
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionCOFF *&Entry = Map[Name];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
-                                              const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((COFFUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
-  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
-  StaticCtorSection =
-    getCOFFSection(".ctors", false, SectionKind::getDataRel());
-  StaticDtorSection =
-    getCOFFSection(".dtors", false, SectionKind::getDataRel());
-
-  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
-  // though it contains relocatable pointers.  In PIC mode, this is probably a
-  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
-  // adjusted or this should be a data section.
-  LSDASection =
-    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
-  EHFrameSection =
-    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
-
-  // Debug info.
-  // FIXME: Don't use 'directive' mode here.
-  DwarfAbbrevSection =
-    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfInfoSection =
-    getCOFFSection("\t.section\t.debug_info,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfLineSection =
-    getCOFFSection("\t.section\t.debug_line,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfFrameSection =
-    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfStrSection =
-    getCOFFSection("\t.section\t.debug_str,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfLocSection =
-    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfARangesSection =
-    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfRangesSection =
-    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
-                   true, SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  return getCOFFSection(GV->getSection(), false, Kind);
-}
-
-static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())
-    return ".text$linkonce";
-  if (Kind.isWriteable())
-    return ".data$linkonce";
-  return ".rdata$linkonce";
-}
-
-
-const MCSection *TargetLoweringObjectFileCOFF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker()) {
-    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
-    return getCOFFSection(Name.str(), false, Kind);
-  }
-
-  if (Kind.isText())
-    return getTextSection();
-
-  return getDataSection();
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp
deleted file mode 100644
index d608119..0000000
--- a/lib/Target/TargetMachOWriterInfo.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the TargetMachOWriterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetMachOWriterInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-using namespace llvm;
-
-TargetMachOWriterInfo::~TargetMachOWriterInfo() {}
-
-MachineRelocation
-TargetMachOWriterInfo::GetJTRelocation(unsigned Offset,
-                                       MachineBasicBlock *MBB) const {
-  // FIXME: do something about PIC
-  return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB);
-}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index fec59b5..88871e3 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -40,7 +40,7 @@ namespace llvm {
   bool UnwindTablesMandatory;
   Reloc::Model RelocationModel;
   CodeModel::Model CMModel;
-  bool PerformTailCallOpt;
+  bool GuaranteedTailCallOpt;
   unsigned StackAlignment;
   bool RealignStack;
   bool DisableJumpTables;
@@ -173,9 +173,9 @@ DefCodeModel("code-model",
                "Large code model"),
     clEnumValEnd));
 static cl::opt<bool, true>
-EnablePerformTailCallOpt("tailcallopt",
-  cl::desc("Turn on tail call optimization."),
-  cl::location(PerformTailCallOpt),
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::location(GuaranteedTailCallOpt),
   cl::init(false));
 static cl::opt<unsigned, true>
 OverrideStackAlignment("stack-alignment",
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index fac67e2..52983ff 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -86,9 +86,10 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index. This is the default implementation
 /// which is overridden for some targets.
-int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+int TargetRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
+                                            int FI) const {
   const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
   return MFI->getObjectOffset(FI) + MFI->getStackSize() -
     TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
 }
@@ -96,7 +97,7 @@ int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
 /// getInitialFrameState - Returns a list of machine moves that are assumed
 /// on entry to a function.
 void
-TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const{
   // Default is to do nothing.
 }
 
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile
index 288b985..25fb0a2 100644
--- a/lib/Target/X86/AsmParser/Makefile
+++ b/lib/Target/X86/AsmParser/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMX86AsmParser
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 1a62044..a58f58e 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -7,8 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Target/TargetAsmLexer.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "X86.h"
@@ -19,18 +22,119 @@ namespace {
   
 class X86AsmLexer : public TargetAsmLexer {
   const MCAsmInfo &AsmInfo;
+  
+  bool tentativeIsValid;
+  AsmToken tentativeToken;
+  
+  const AsmToken &lexTentative() {
+    tentativeToken = getLexer()->Lex();
+    tentativeIsValid = true;
+    return tentativeToken;
+  }
+  
+  const AsmToken &lexDefinite() {
+    if(tentativeIsValid) {
+      tentativeIsValid = false;
+      return tentativeToken;
+    }
+    else {
+      return getLexer()->Lex();
+    }
+  }
+  
+  AsmToken LexTokenATT();
+  AsmToken LexTokenIntel();
 protected:
-  AsmToken LexToken();
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
+    }
+    
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenATT();
+    case 1:
+      return LexTokenIntel();
+    }
+  }
 public:
   X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
-    : TargetAsmLexer(T), AsmInfo(MAI) {
+    : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
   }
 };
 
 }
 
-AsmToken X86AsmLexer::LexToken() {
-  return AsmToken(AsmToken::Error, "", 0);
+static unsigned MatchRegisterName(StringRef Name);
+
+AsmToken X86AsmLexer::LexTokenATT() {
+  const AsmToken lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Percent:
+  {
+    const AsmToken &nextToken = lexTentative();
+    if (nextToken.getKind() == AsmToken::Identifier) {
+      unsigned regID = MatchRegisterName(nextToken.getString());
+      
+      if (regID) {
+        lexDefinite();
+        
+        StringRef regStr(lexedToken.getString().data(),
+                         lexedToken.getString().size() + 
+                         nextToken.getString().size());
+        
+        return AsmToken(AsmToken::Register, 
+                        regStr, 
+                        static_cast<int64_t>(regID));
+      }
+      else {
+        return AsmToken(lexedToken);
+      }
+    }
+    else {
+      return AsmToken(lexedToken);
+    }
+  }    
+  }
+}
+
+AsmToken X86AsmLexer::LexTokenIntel() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch(lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    }
+    else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
 }
 
 extern "C" void LLVMInitializeX86AsmLexer() {
@@ -38,6 +142,6 @@ extern "C" void LLVMInitializeX86AsmLexer() {
   RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
 }
 
-//#define REGISTERS_ONLY
-//#include "../X86GenAsmMatcher.inc"
-//#undef REGISTERS_ONLY
+#define REGISTERS_ONLY
+#include "X86GenAsmMatcher.inc"
+#undef REGISTERS_ONLY
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 7a9218e..84d7bb7 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "X86.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
@@ -67,7 +68,7 @@ public:
 /// @name Auto-generated Match Functions
 /// {  
 
-static unsigned MatchRegisterName(const StringRef &Name);
+static unsigned MatchRegisterName(StringRef Name);
 
 /// }
 
@@ -172,8 +173,25 @@ struct X86Operand : public MCParsedAsmOperand {
   
   bool isMem() const { return Kind == Memory; }
 
+  bool isAbsMem() const {
+    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+      !getMemIndexReg() && getMemScale() == 1;
+  }
+
+  bool isNoSegMem() const {
+    return Kind == Memory && !getMemSegReg();
+  }
+
   bool isReg() const { return Kind == Register; }
 
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -181,26 +199,35 @@ struct X86Operand : public MCParsedAsmOperand {
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    addExpr(Inst, getImm());
   }
 
   void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
     // FIXME: Support user customization of the render method.
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    addExpr(Inst, getImm());
   }
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
-    assert((N == 4 || N == 5) && "Invalid number of operands!");
-
+    assert((N == 5) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    addExpr(Inst, getMemDisp());
+    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  }
+
+  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 1) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+  }
 
-    // FIXME: What a hack.
-    if (N == 5)
-      Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 4) && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    addExpr(Inst, getMemDisp());
   }
 
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
@@ -222,10 +249,24 @@ struct X86Operand : public MCParsedAsmOperand {
     return Res;
   }
 
+  /// Create an absolute memory operand.
+  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
+                               SMLoc EndLoc) {
+    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+    Res->Mem.SegReg   = 0;
+    Res->Mem.Disp     = Disp;
+    Res->Mem.BaseReg  = 0;
+    Res->Mem.IndexReg = 0;
+    Res->Mem.Scale    = 1;
+    return Res;
+  }
+
+  /// Create a generalized memory operand.
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
-    // We should never just have a displacement, that would be an immediate.
+    // We should never just have a displacement, that should be parsed as an
+    // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 
     // The scale should always be one of {1,2,4,8}.
@@ -259,6 +300,42 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
   // FIXME: Validate register for the current architecture; we have to do
   // validation later, so maybe there is no need for this here.
   RegNo = MatchRegisterName(Tok.getString());
+  
+  // Parse %st(1) and "%st" as "%st(0)"
+  if (RegNo == 0 && Tok.getString() == "st") {
+    RegNo = X86::ST0;
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat 'st'
+    
+    // Check to see if we have '(4)' after %st.
+    if (getLexer().isNot(AsmToken::LParen))
+      return false;
+    // Lex the paren.
+    getParser().Lex();
+
+    const AsmToken &IntTok = Parser.getTok();
+    if (IntTok.isNot(AsmToken::Integer))
+      return Error(IntTok.getLoc(), "expected stack index");
+    switch (IntTok.getIntVal()) {
+    case 0: RegNo = X86::ST0; break;
+    case 1: RegNo = X86::ST1; break;
+    case 2: RegNo = X86::ST2; break;
+    case 3: RegNo = X86::ST3; break;
+    case 4: RegNo = X86::ST4; break;
+    case 5: RegNo = X86::ST5; break;
+    case 6: RegNo = X86::ST6; break;
+    case 7: RegNo = X86::ST7; break;
+    default: return Error(IntTok.getLoc(), "invalid stack index");
+    }
+    
+    if (getParser().Lex().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "expected ')'");
+    
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat ')'
+    return false;
+  }
+  
   if (RegNo == 0)
     return Error(Tok.getLoc(), "invalid register name");
 
@@ -300,8 +377,8 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
   
   // We have to disambiguate a parenthesized expression "(4+5)" from the start
   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
-  // only way to do this without lookahead is to eat the ( and see what is after
-  // it.
+  // only way to do this without lookahead is to eat the '(' and see what is
+  // after it.
   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
   if (getLexer().isNot(AsmToken::LParen)) {
     SMLoc ExprEnd;
@@ -312,7 +389,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
     if (getLexer().isNot(AsmToken::LParen)) {
       // Unless we have a segment register, treat this as an immediate.
       if (SegReg == 0)
-        return X86Operand::CreateImm(Disp, MemStart, ExprEnd);
+        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
     }
     
@@ -339,7 +416,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
       if (getLexer().isNot(AsmToken::LParen)) {
         // Unless we have a segment register, treat this as an immediate.
         if (SegReg == 0)
-          return X86Operand::CreateImm(Disp, LParenLoc, ExprEnd);
+          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
       }
       
@@ -424,8 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
 bool X86ATTAsmParser::
 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-
-  Operands.push_back(X86Operand::CreateToken(Name, NameLoc));
+  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
+  // represent alternative syntaxes in the .td file, without requiring
+  // instruction duplication.
+  StringRef PatchedName = StringSwitch<StringRef>(Name)
+    .Case("sal", "shl")
+    .Case("salb", "shlb")
+    .Case("sall", "shll")
+    .Case("salq", "shlq")
+    .Case("salw", "shlw")
+    .Case("repe", "rep")
+    .Case("repz", "rep")
+    .Case("repnz", "repne")
+    .Default(Name);
+  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
index 326a22f..2368761 100644
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
 LIBRARYNAME = LLVMX86AsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 804dbb9..1a35a49 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -18,17 +18,22 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "X86GenInstrNames.inc"
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
-#define NO_ASM_WRITER_BOILERPLATE
+#define GET_INSTRUCTION_NAME
 #include "X86GenAsmWriter.inc"
 #undef MachineInstr
 
 void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
 
 void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   switch (MI->getOperand(Op).getImm()) {
@@ -66,6 +71,10 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) {
     O << '%' << getRegisterName(Op.getReg());
   } else if (Op.isImm()) {
     O << '$' << Op.getImm();
+    
+    if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+      *CommentStream << format("imm = 0x%X\n", Op.getImm());
+    
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     O << '$' << *Op.getExpr();
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3180618..d109a07 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -26,11 +26,12 @@ public:
 
   
   virtual void printInst(const MCInst *MI);
-  
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI);
   static const char *getRegisterName(unsigned RegNo);
-
+  static const char *getInstructionName(unsigned Opcode);
 
   void printOperand(const MCInst *MI, unsigned OpNo);
   void printMemReference(const MCInst *MI, unsigned Op);
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 2ffa18f..8cab24c 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -8,12 +8,10 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to AT&T format assembly
-// language. This printer is the output mechanism used by `llc'.
+// of machine-dependent LLVM code to X86 machine code.
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "asm-printer"
 #include "X86AsmPrinter.h"
 #include "X86ATTInstPrinter.h"
 #include "X86IntelInstPrinter.h"
@@ -29,169 +27,70 @@
 #include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 //===----------------------------------------------------------------------===//
 // Primitive Helper Functions.
 //===----------------------------------------------------------------------===//
 
-void X86AsmPrinter::printMCInst(const MCInst *MI) {
-  if (MAI->getAssemblerDialect() == 0)
-    X86ATTInstPrinter(O, *MAI).printInstruction(MI);
-  else
-    X86IntelInstPrinter(O, *MAI).printInstruction(MI);
-}
-
 void X86AsmPrinter::PrintPICBaseSymbol() const {
-  // FIXME: Gross const cast hack.
-  X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this);
-  O << *X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol();
+  const TargetLowering *TLI = TM.getTargetLowering();
+  O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF,
+                                                                    OutContext);
 }
 
-void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
-  unsigned FnAlign = MF.getAlignment();
-  const Function *F = MF.getFunction();
+MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, GV, false);
+  MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str());
 
   if (Subtarget->isTargetCygMing()) {
-    X86COFFMachineModuleInfo &COFFMMI = 
+    X86COFFMachineModuleInfo &COFFMMI =
       MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-    COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext, F,
-                                *TM.getTargetData());
-  }
-
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
-  EmitAlignment(FnAlign, F);
-
-  switch (F->getLinkage()) {
-  default: llvm_unreachable("Unknown linkage type!");
-  case Function::InternalLinkage:  // Symbols default to internal.
-  case Function::PrivateLinkage:
-    break;
-  case Function::DLLExportLinkage:
-  case Function::ExternalLinkage:
-    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global);
-    break;
-  case Function::LinkerPrivateLinkage:
-  case Function::LinkOnceAnyLinkage:
-  case Function::LinkOnceODRLinkage:
-  case Function::WeakAnyLinkage:
-  case Function::WeakODRLinkage:
-    if (Subtarget->isTargetDarwin()) {
-      OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global);
-      O << MAI->getWeakDefDirective() << *CurrentFnSym << '\n';
-    } else if (Subtarget->isTargetCygMing()) {
-      OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global);
-      O << "\t.linkonce discard\n";
-    } else {
-      O << "\t.weak\t" << *CurrentFnSym << '\n';
-    }
-    break;
-  }
+    COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData());
 
-  printVisibility(CurrentFnSym, F->getVisibility());
+    // Save function name for later type emission.
+    if (const Function *F = dyn_cast<Function>(GV))
+      if (F->isDeclaration())
+        COFFMMI.addExternalFunction(Symb->getName());
 
-  if (Subtarget->isTargetELF()) {
-    O << "\t.type\t" << *CurrentFnSym << ",@function\n";
-  } else if (Subtarget->isTargetCygMing()) {
-    O << "\t.def\t " << *CurrentFnSym;
-    O << ";\t.scl\t" <<
-      (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
-      << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
-      << ";\t.endef\n";
   }
 
-  O << *CurrentFnSym << ':';
-  if (VerboseAsm) {
-    O.PadToColumn(MAI->getCommentColumn());
-    O << MAI->getCommentString() << ' ';
-    WriteAsOperand(O, F, /*PrintType=*/false, F->getParent());
-  }
-  O << '\n';
-
-  // Add some workaround for linkonce linkage on Cygwin\MinGW
-  if (Subtarget->isTargetCygMing() &&
-      (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
-    O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n";
+  return Symb;
 }
 
-/// runOnMachineFunction - This uses the printMachineInstruction()
-/// method to print assembly for each instruction.
+/// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  this->MF = &MF;
-  CallingConv::ID CC = F->getCallingConv();
-
   SetupMachineFunction(MF);
-  O << "\n\n";
 
   if (Subtarget->isTargetCOFF()) {
-    X86COFFMachineModuleInfo &COFFMMI = 
-    MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-
-    // Populate function information map.  Don't want to populate
-    // non-stdcall or non-fastcall functions' information right now.
-    if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
-      COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>());
-  }
-
-  // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print the 'header' of function
-  emitFunctionHeader(MF);
-
-  // Emit pre-function debug and/or EH information.
-  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
-    DW->BeginFunction(&MF);
-
-  // Print out code for the function.
-  bool hasAnyRealCode = false;
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    // Print a label for the basic block.
-    EmitBasicBlockStart(I);
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      // Print the assembly for the instruction.
-      if (!II->isLabel())
-        hasAnyRealCode = true;
-      printMachineInstruction(II);
-    }
-  }
-
-  if (Subtarget->isTargetDarwin() && !hasAnyRealCode) {
-    // If the function is empty, then we need to emit *something*. Otherwise,
-    // the function's label might be associated with something that it wasn't
-    // meant to be associated with. We emit a noop in this situation.
-    // We are assuming inline asms are code.
-    O << "\tnop\n";
+    const Function *F = MF.getFunction();
+    O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" <<
+    (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
+    << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+    << ";\t.endef\n";
   }
 
-  if (MAI->hasDotTypeDotSizeDirective())
-    O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n';
+  // Have common code print out the function header with linkage info etc.
+  EmitFunctionHeader();
 
-  // Emit post-function debug information.
-  if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling())
-    DW->EndFunction(&MF);
-
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+  // Emit the rest of the function body.
+  EmitFunctionBody();
 
   // We didn't modify anything.
   return false;
@@ -223,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
     else
       GVSym = GetGlobalValueSymbol(GV);
 
-    if (Subtarget->isTargetCygMing()) {
-      X86COFFMachineModuleInfo &COFFMMI =
-        MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-      COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData());
-    }
-    
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
@@ -237,20 +130,20 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
       
-      const MCSymbol *&StubSym = 
+      MCSymbol *&StubSym = 
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
       if (StubSym == 0)
         StubSym = GetGlobalValueSymbol(GV);
       
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      const MCSymbol *&StubSym =
+      MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
       if (StubSym == 0)
         StubSym = GetGlobalValueSymbol(GV);
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
-      const MCSymbol *&StubSym =
+      MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
       if (StubSym == 0)
         StubSym = GetGlobalValueSymbol(GV);
@@ -272,8 +165,8 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
       TempNameStr += StringRef(MO.getSymbolName());
       TempNameStr += StringRef("$stub");
       
-      const MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
-      const MCSymbol *&StubSym =
+      MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+      MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
       if (StubSym == 0) {
         TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
@@ -338,7 +231,7 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     return;
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
@@ -451,30 +344,6 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
   printLeaMemReference(MI, Op, Modifier);
 }
 
-void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid,
-                                           const MachineBasicBlock *MBB) const {
-  if (!MAI->getSetDirective())
-    return;
-
-  // We don't need .set machinery if we have GOT-style relocations
-  if (Subtarget->isPICStyleGOT())
-    return;
-
-  O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix()
-    << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
-  
-  O << *GetMBBSymbol(MBB->getNumber());
-  
-  if (Subtarget->isPICStyleRIPRel())
-    O << '-' << *GetJTISymbol(uid) << '\n';
-  else {
-    O << '-';
-    PrintPICBaseSymbol();
-    O << '\n';
-  }
-}
-
-
 void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
   PrintPICBaseSymbol();
   O << '\n';
@@ -482,23 +351,6 @@ void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
   O << ':';
 }
 
-void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
-                                           const MachineBasicBlock *MBB,
-                                           unsigned uid) const {
-  const char *JTEntryDirective = MJTI->getEntrySize() == 4 ?
-    MAI->getData32bitsDirective() : MAI->getData64bitsDirective();
-
-  O << JTEntryDirective << ' ';
-
-  if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) {
-    O << MAI->getPrivateGlobalPrefix() << getFunctionNumber()
-      << '_' << uid << "_set_" << MBB->getNumber();
-  } else if (Subtarget->isPICStyleGOT())
-    O << *GetMBBSymbol(MBB->getNumber()) << "@GOTOFF";
-  else
-    O << *GetMBBSymbol(MBB->getNumber());
-}
-
 bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
   unsigned Reg = MO.getReg();
   switch (Mode) {
@@ -625,24 +477,6 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 
-
-/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
-/// AT&T syntax to the current output stream.
-///
-void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-
-  processDebugLoc(MI, true);
-  
-  printInstructionThroughMCStreamer(MI);
-  
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-
-  processDebugLoc(MI, false);
-}
-
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     // All darwin targets use mach-o.
@@ -666,14 +500,17 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        O << *Stubs[i].first << ":\n";
-        // Get the MCSymbol without the $stub suffix.
-        O << "\t.indirect_symbol " << *Stubs[i].second;
-        O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n";
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol);
+        // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4 = -12.
+        const char HltInsts[] = { -12, -12, -12, -12, -12 };
+        OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
       }
-      O << '\n';
       
       Stubs.clear();
+      OutStreamer.AddBlankLine();
     }
 
     // Output stubs for external and common global variables.
@@ -686,10 +523,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       OutStreamer.SwitchSection(TheSection);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        O << *Stubs[i].first << ":\n\t.indirect_symbol " << *Stubs[i].second;
-        O << "\n\t.long\t0\n";
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .indirect_symbol _foo
+        OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol);
+        // .long 0
+        OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
       }
       Stubs.clear();
+      OutStreamer.AddBlankLine();
     }
 
     Stubs = MMIMacho.GetHiddenGVStubList();
@@ -698,10 +540,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       EmitAlignment(2);
 
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        O << *Stubs[i].first << ":\n" << MAI->getData32bitsDirective();
-        O << *Stubs[i].second << '\n';
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .long _foo
+        OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second,
+                                                      OutContext),
+                              4/*size*/, 0/*addrspace*/);
       }
       Stubs.clear();
+      OutStreamer.AddBlankLine();
     }
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -735,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
         if (I->hasDLLExportLinkage()) {
           MCSymbol *Sym = GetGlobalValueSymbol(I);
-          COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData());
           DLLExportedFns.push_back(Sym);
         }
 
@@ -757,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       }
     }
   }
+
+  if (Subtarget->isTargetELF()) {
+    TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const TargetData *TD = TM.getTargetData();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i)
+        O << *Stubs[i].first << ":\n"
+          << (TD->getPointerSize() == 8 ?
+              MAI->getData64bitsDirective() : MAI->getData32bitsDirective())
+          << *Stubs[i].second << '\n';
+
+      Stubs.clear();
+    }
+  }
 }
 
 
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 6a9262d..039214a 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -36,8 +36,9 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
  public:
   explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                            const MCAsmInfo *T, bool V)
-    : AsmPrinter(O, TM, T, V) {
+                         MCContext &Ctx, MCStreamer &Streamer,
+                         const MCAsmInfo *T)
+    : AsmPrinter(O, TM, Ctx, Streamer, T) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -57,14 +58,10 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   
   virtual void EmitEndOfAsmFile(Module &M);
   
-  void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
-
-  void printMCInst(const MCInst *MI);
-
-  void printSymbolOperand(const MachineOperand &MO);
-  
+  virtual void EmitInstruction(const MachineInstr *MI);
   
+  void printSymbolOperand(const MachineOperand &MO);
+  virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const;
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
@@ -124,24 +121,12 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
                          const char *Modifier=NULL);
   void printLeaMemReference(const MachineInstr *MI, unsigned Op,
                             const char *Modifier=NULL);
-  void printPICJumpTableSetLabel(unsigned uid,
-                                 const MachineBasicBlock *MBB) const;
-  void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
-                                 const MachineBasicBlock *MBB) const {
-    AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
-  }
-  void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
-                              const MachineBasicBlock *MBB,
-                              unsigned uid) const;
 
   void printPICLabel(const MachineInstr *MI, unsigned Op);
 
   void PrintPICBaseSymbol() const;
   
   bool runOnMachineFunction(MachineFunction &F);
-
-  void emitFunctionHeader(const MachineFunction &MF);
-
 };
 
 } // end namespace llvm
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index 4efb529..610beb5 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -24,11 +24,14 @@ using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
-#define NO_ASM_WRITER_BOILERPLATE
+#define GET_INSTRUCTION_NAME
 #include "X86GenAsmWriter1.inc"
 #undef MachineInstr
 
 void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
 
 void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 1976177..545bf84 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -26,10 +26,12 @@ public:
     : MCInstPrinter(O, MAI) {}
   
   virtual void printInst(const MCInst *MI);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI);
   static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
 
 
   void printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index b970d46..fa8d13d 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -14,8 +14,9 @@
 
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
-#include "X86MCAsmInfo.h"
 #include "X86COFFMachineModuleInfo.h"
+#include "X86MCAsmInfo.h"
+#include "X86MCTargetExpr.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
@@ -25,6 +26,7 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 
@@ -39,37 +41,45 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
 
 
 MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
-  return Ctx.GetOrCreateSymbol(Twine(AsmPrinter.MAI->getPrivateGlobalPrefix())+
-                               Twine(AsmPrinter.getFunctionNumber())+"$pb");
+  const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering();
+  return static_cast<const X86TargetLowering*>(TLI)->
+    getPICBaseSymbol(AsmPrinter.MF, Ctx);
 }
 
-/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an
-/// MCOperand.
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
 MCSymbol *X86MCInstLower::
-GetGlobalAddressSymbol(const MachineOperand &MO) const {
-  const GlobalValue *GV = MO.getGlobal();
-  
-  bool isImplicitlyPrivate = false;
-  if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
-      MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
-      MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
-      MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
-    isImplicitlyPrivate = true;
-  
+GetSymbolFromOperand(const MachineOperand &MO) const {
+  assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+
   SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
   
-  if (getSubtarget().isTargetCygMing()) {
-    X86COFFMachineModuleInfo &COFFMMI = 
-      AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-    COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData());
-  }
+  if (MO.isGlobal()) {
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      isImplicitlyPrivate = true;
+    
+    const GlobalValue *GV = MO.getGlobal();
+    Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
   
+    if (getSubtarget().isTargetCygMing()) {
+      X86COFFMachineModuleInfo &COFFMMI = 
+        AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+      COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData());
+    }
+  } else {
+    assert(MO.isSymbol());
+    Name += AsmPrinter.MAI->getGlobalPrefix();
+    Name += MO.getSymbolName();
+  }
+
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
   switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case X86II::MO_NO_FLAG:                // No flag.
-  case X86II::MO_PIC_BASE_OFFSET:        // Doesn't modify symbol name.
-    break;
+  default: break;
   case X86II::MO_DLLIMPORT: {
     // Handle dllimport linkage.
     const char *Prefix = "__imp_";
@@ -81,190 +91,72 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
     Name += "$non_lazy_ptr";
     MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
 
-    const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym);
-    if (StubSym == 0)
-      StubSym = AsmPrinter.GetGlobalValueSymbol(GV);
+    MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+    }
     return Sym;
   }
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
     Name += "$non_lazy_ptr";
     MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
-    const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym);
-    if (StubSym == 0)
-      StubSym = AsmPrinter.GetGlobalValueSymbol(GV);
+    MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym);
+    if (StubSym == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+    }
     return Sym;
   }
   case X86II::MO_DARWIN_STUB: {
     Name += "$stub";
     MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
-    const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
-    if (StubSym == 0)
-      StubSym = AsmPrinter.GetGlobalValueSymbol(GV);
-    return Sym;
-  }
-  // FIXME: These probably should be a modifier on the symbol or something??
-  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
-  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
-  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
-  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
-  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
-  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
-  case X86II::MO_GOT:       Name += "@GOT";       break;
-  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
-  case X86II::MO_PLT:       Name += "@PLT";       break;
-  }
-  
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *X86MCInstLower::
-GetExternalSymbolSymbol(const MachineOperand &MO) const {
-  SmallString<128> Name;
-  Name += AsmPrinter.MAI->getGlobalPrefix();
-  Name += MO.getSymbolName();
-  
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case X86II::MO_NO_FLAG:                // No flag.
-  case X86II::MO_GOT_ABSOLUTE_ADDRESS:   // Doesn't modify symbol name.
-  case X86II::MO_PIC_BASE_OFFSET:        // Doesn't modify symbol name.
-    break;
-  case X86II::MO_DLLIMPORT: {
-    // Handle dllimport linkage.
-    const char *Prefix = "__imp_";
-    Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
-    break;
-  }
-  case X86II::MO_DARWIN_STUB: {
-    Name += "$stub";
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
-    const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
-
-    if (StubSym == 0) {
+    MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
+    if (StubSym)
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+    } else {
       Name.erase(Name.end()-5, Name.end());
       StubSym = Ctx.GetOrCreateSymbol(Name.str());
     }
     return Sym;
   }
-  // FIXME: These probably should be a modifier on the symbol or something??
-  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
-  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
-  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
-  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
-  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
-  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
-  case X86II::MO_GOT:       Name += "@GOT";       break;
-  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
-  case X86II::MO_PLT:       Name += "@PLT";       break;
-  }
-  
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  // FIXME: Use AsmPrinter.GetJTISymbol.  @TLSGD shouldn't be part of the symbol
-  // name!
-  raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI"
-    << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
-  
-  switch (MO.getTargetFlags()) {
-  default:
-    llvm_unreachable("Unknown target flag on GV operand");
-  case X86II::MO_NO_FLAG:    // No flag.
-  case X86II::MO_PIC_BASE_OFFSET:
-  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
-  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
-    break;
-    // FIXME: These probably should be a modifier on the symbol or something??
-  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
-  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
-  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
-  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
-  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
-  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
-  case X86II::MO_GOT:       Name += "@GOT";       break;
-  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
-  case X86II::MO_PLT:       Name += "@PLT";       break;
   }
-  
-  // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-
 
-MCSymbol *X86MCInstLower::
-GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  // FIXME: USe AsmPrinter.GetCPISymbol.  @TLSGD shouldn't be part of the symbol
-  // name!
-  raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI"
-    << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex();
-  
-  switch (MO.getTargetFlags()) {
-  default:
-    llvm_unreachable("Unknown target flag on GV operand");
-  case X86II::MO_NO_FLAG:    // No flag.
-  case X86II::MO_PIC_BASE_OFFSET:
-  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
-  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
-    break;
-    // FIXME: These probably should be a modifier on the symbol or something??
-  case X86II::MO_TLSGD:     Name += "@TLSGD";     break;
-  case X86II::MO_GOTTPOFF:  Name += "@GOTTPOFF";  break;
-  case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break;
-  case X86II::MO_TPOFF:     Name += "@TPOFF";     break;
-  case X86II::MO_NTPOFF:    Name += "@NTPOFF";    break;
-  case X86II::MO_GOTPCREL:  Name += "@GOTPCREL";  break;
-  case X86II::MO_GOT:       Name += "@GOT";       break;
-  case X86II::MO_GOTOFF:    Name += "@GOTOFF";    break;
-  case X86II::MO_PLT:       Name += "@PLT";       break;
-  }
-  
-  // Create a symbol for the name.
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
-MCSymbol *X86MCInstLower::
-GetBlockAddressSymbol(const MachineOperand &MO) const {
-  const char *Suffix = "";
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on BA operand");
-  case X86II::MO_NO_FLAG:         break; // No flag.
-  case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name.
-  case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break;
-  }
-
-  return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix);
-}
-
 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                              MCSymbol *Sym) const {
   // FIXME: We would like an efficient form for this, so we don't have to do a
   // lot of extra uniquing.
-  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+  const MCExpr *Expr = 0;
+  X86MCTargetExpr::VariantKind RefKind = X86MCTargetExpr::Invalid;
   
   switch (MO.getTargetFlags()) {
   default: llvm_unreachable("Unknown target flag on GV operand");
   case X86II::MO_NO_FLAG:    // No flag.
-      
   // These affect the name of the symbol, not any suffix.
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
   case X86II::MO_DARWIN_STUB:
-  case X86II::MO_TLSGD:
-  case X86II::MO_GOTTPOFF:
-  case X86II::MO_INDNTPOFF:
-  case X86II::MO_TPOFF:
-  case X86II::MO_NTPOFF:
-  case X86II::MO_GOTPCREL:
-  case X86II::MO_GOT:
-  case X86II::MO_GOTOFF:
-  case X86II::MO_PLT:
     break;
+      
+  case X86II::MO_TLSGD:     RefKind = X86MCTargetExpr::TLSGD; break;
+  case X86II::MO_GOTTPOFF:  RefKind = X86MCTargetExpr::GOTTPOFF; break;
+  case X86II::MO_INDNTPOFF: RefKind = X86MCTargetExpr::INDNTPOFF; break;
+  case X86II::MO_TPOFF:     RefKind = X86MCTargetExpr::TPOFF; break;
+  case X86II::MO_NTPOFF:    RefKind = X86MCTargetExpr::NTPOFF; break;
+  case X86II::MO_GOTPCREL:  RefKind = X86MCTargetExpr::GOTPCREL; break;
+  case X86II::MO_GOT:       RefKind = X86MCTargetExpr::GOT; break;
+  case X86II::MO_GOTOFF:    RefKind = X86MCTargetExpr::GOTOFF; break;
+  case X86II::MO_PLT:       RefKind = X86MCTargetExpr::PLT; break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    Expr = MCSymbolRefExpr::Create(Sym, Ctx);
     // Subtract the pic base.
     Expr = MCBinaryExpr::CreateSub(Expr, 
                                MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
@@ -272,6 +164,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     break;
   }
   
+  if (Expr == 0) {
+    if (RefKind == X86MCTargetExpr::Invalid)
+      Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+    else
+      Expr = X86MCTargetExpr::Create(Sym, RefKind, Ctx);
+  }
+  
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
                                    MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -300,6 +199,17 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
   }
 }
 
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb   -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  OutMI.addOperand(OutMI.getOperand(0));
+  OutMI.addOperand(OutMI.getOperand(0));
+}
 
 
 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
@@ -323,22 +233,23 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx));
+                       MO.getMBB()->getSymbol(Ctx), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
-      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
       break;
     case MachineOperand::MO_ExternalSymbol:
-      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
       break;
     case MachineOperand::MO_JumpTableIndex:
-      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
       break;
     case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
       break;
     case MachineOperand::MO_BlockAddress:
-      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+      MCOp = LowerSymbolOperand(MO,
+                        AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
       break;
     }
     
@@ -350,72 +261,48 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
     lower_lea64_32mem(&OutMI, 1);
     break;
-  case X86::MOVZX16rr8:
-    OutMI.setOpcode(X86::MOVZX32rr8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX16rm8:
-    OutMI.setOpcode(X86::MOVZX32rm8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVSX16rr8:
-    OutMI.setOpcode(X86::MOVSX32rr8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVSX16rm8:
-    OutMI.setOpcode(X86::MOVSX32rm8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rr32:
-    OutMI.setOpcode(X86::MOV32rr);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rm32:
-    OutMI.setOpcode(X86::MOV32rm);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOV64ri64i32:
-    OutMI.setOpcode(X86::MOV32ri);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rr8:
-    OutMI.setOpcode(X86::MOVZX32rr8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rm8:
-    OutMI.setOpcode(X86::MOVZX32rm8);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rr16:
-    OutMI.setOpcode(X86::MOVZX32rr16);
-    lower_subreg32(&OutMI, 0);
-    break;
-  case X86::MOVZX64rm16:
-    OutMI.setOpcode(X86::MOVZX32rm16);
-    lower_subreg32(&OutMI, 0);
-    break;
+  case X86::MOVZX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+  case X86::MOVZX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+  case X86::MOVSX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break;
+  case X86::MOVSX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break;
+  case X86::MOVZX64rr32:  LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+  case X86::MOVZX64rm32:  LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+  case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+  case X86::MOVZX64rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+  case X86::MOVZX64rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+  case X86::MOVZX64rr16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+  case X86::MOVZX64rm16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+  case X86::SETB_C8r:     LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
+  case X86::SETB_C16r:    LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
+  case X86::SETB_C32r:    LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
+  case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
+  case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+  case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+  case X86::MMX_V_SET0:   LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
+  case X86::MMX_V_SETALLONES:
+    LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
+  case X86::FsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::FsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::V_SET0:       LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+
   case X86::MOV16r0:
-    OutMI.setOpcode(X86::MOV32r0);
-    lower_subreg32(&OutMI, 0);
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV16r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
   case X86::MOV64r0:
-    OutMI.setOpcode(X86::MOV32r0);
-    lower_subreg32(&OutMI, 0);
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV64r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
   }
 }
 
 
 
-void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(OutContext, Mang, *this);
   switch (MI->getOpcode()) {
-  case TargetInstrInfo::DBG_LABEL:
-  case TargetInstrInfo::EH_LABEL:
-  case TargetInstrInfo::GC_LABEL:
-    printLabel(MI);
-    return;
-  case TargetInstrInfo::DEBUG_VALUE: {
+  case TargetOpcode::DBG_VALUE: {
     // FIXME: if this is implemented for another target before it goes
     // away completely, the common part should be moved into AsmPrinter.
     if (!VerboseAsm)
@@ -427,9 +314,35 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     O << V.getName();
     O << " <- ";
     if (NOps==3) {
-      // Variable is in register
-      assert(MI->getOperand(0).getType()==MachineOperand::MO_Register);
-      printOperand(MI, 0);
+      // Register or immediate value. Register 0 means undef.
+      assert(MI->getOperand(0).getType()==MachineOperand::MO_Register ||
+             MI->getOperand(0).getType()==MachineOperand::MO_Immediate ||
+             MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate);
+      if (MI->getOperand(0).getType()==MachineOperand::MO_Register &&
+          MI->getOperand(0).getReg()==0) {
+        // Suppress offset in this case, it is not meaningful.
+        O << "undef";
+        OutStreamer.AddBlankLine();
+        return;
+      } else if (MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate) {
+        // This is more naturally done in printOperand, but since the only use
+        // of such an operand is in this comment and that is temporary (and it's
+        // ugly), we prefer to keep this localized.
+        // The include of Type.h may be removable when this code is.
+        if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() ||
+            MI->getOperand(0).getFPImm()->getType()->isDoubleTy())
+          MI->getOperand(0).print(O, &TM);
+        else {
+          // There is no good way to print long double.  Convert a copy to
+          // double.  Ah well, it's only a comment.
+          bool ignored;
+          APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+          APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                      &ignored);
+          O << "(long double) " << APF.convertToDouble();
+        }
+      } else
+        printOperand(MI, 0);
     } else {
       // Frame address.  Currently handles register +- offset only.
       assert(MI->getOperand(0).getType()==MachineOperand::MO_Register);
@@ -438,17 +351,9 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     }
     O << "+";
     printOperand(MI, NOps-2);
+    OutStreamer.AddBlankLine();
     return;
   }
-  case TargetInstrInfo::INLINEASM:
-    printInlineAsm(MI);
-    return;
-  case TargetInstrInfo::IMPLICIT_DEF:
-    printImplicitDef(MI);
-    return;
-  case TargetInstrInfo::KILL:
-    printKill(MI);
-    return;
   case X86::MOVPC32r: {
     MCInst TmpInst;
     // This is a pseudo op for a two instruction sequence with a label, which
@@ -464,8 +369,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     // lot of extra uniquing.
     TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
                                                                  OutContext)));
-    printMCInst(&TmpInst);
-    O << '\n';
+    OutStreamer.EmitInstruction(TmpInst);
     
     // Emit the label.
     OutStreamer.EmitLabel(PICBase);
@@ -473,7 +377,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     // popl $reg
     TmpInst.setOpcode(X86::POP32r);
     TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
-    printMCInst(&TmpInst);
+    OutStreamer.EmitInstruction(TmpInst);
     return;
   }
       
@@ -495,7 +399,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     OutStreamer.EmitLabel(DotSym);
     
     // Now that we have emitted the label, lower the complex operand expression.
-    MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2));
+    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
     
     const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
     const MCExpr *PICBase =
@@ -510,7 +414,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
     TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
-    printMCInst(&TmpInst);
+    OutStreamer.EmitInstruction(TmpInst);
     return;
   }
   }
@@ -518,7 +422,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
   
-  
-  printMCInst(&TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
 }
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index 94f8bfc..ebd23f6 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -39,11 +39,7 @@ public:
 
   MCSymbol *GetPICBaseSymbol() const;
   
-  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
   
 private:
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4186fec..61f26a7 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -25,6 +25,8 @@ set(sources
   X86InstrInfo.cpp
   X86JITInfo.cpp
   X86MCAsmInfo.cpp
+  X86MCCodeEmitter.cpp 
+  X86MCTargetExpr.cpp
   X86RegisterInfo.cpp
   X86Subtarget.cpp
   X86TargetMachine.cpp
diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile
index 6c26853..b289647 100644
--- a/lib/Target/X86/Disassembler/Makefile
+++ b/lib/Target/X86/Disassembler/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMX86Disassembler
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 5e625dc..f4ff894 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMX86CodeGen
 TARGET = X86
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
@@ -19,6 +18,7 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
                 X86GenDisassemblerTables.inc X86GenFastISel.inc \
                 X86GenCallingConv.inc X86GenSubtarget.inc \
+		X86GenEDInfo.inc
 
 DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 0f3e44b..19eb05e 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -376,7 +376,7 @@ ret
 ... saving two instructions.
 
 The basic idea is that a reload from a spill slot, can, if only one 4-byte 
-chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+chunk is used, bring in 3 zeros the one element instead of 4 elements.
 This can be used to simplify a variety of shuffle operations, where the
 elements are fixed zeros.
 
@@ -936,3 +936,54 @@ Also, the 'ret's should be shared.  This is PR6032.
 
 //===---------------------------------------------------------------------===//
 
+These should compile into the same code (PR6214): Perhaps instcombine should
+canonicalize the former into the later?
+
+define float @foo(float %x) nounwind {
+  %t = bitcast float %x to i32
+  %s = and i32 %t, 2147483647
+  %d = bitcast i32 %s to float
+  ret float %d
+}
+
+declare float @fabsf(float %n)
+define float @bar(float %x) nounwind {
+  %d = call float @fabsf(float %x)
+  ret float %d
+}
+
+//===---------------------------------------------------------------------===//
+
+This IR (from PR6194):
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { double, double }
+%struct.float3 = type { float, float, float }
+
+define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp {
+entry:
+  %tmp18 = extractvalue %0 %0, 0                  ; <double> [#uses=1]
+  %tmp19 = bitcast double %tmp18 to i64           ; <i64> [#uses=1]
+  %tmp20 = zext i64 %tmp19 to i128                ; <i128> [#uses=1]
+  %tmp10 = lshr i128 %tmp20, 32                   ; <i128> [#uses=1]
+  %tmp11 = trunc i128 %tmp10 to i32               ; <i32> [#uses=1]
+  %tmp12 = bitcast i32 %tmp11 to float            ; <float> [#uses=1]
+  %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1]
+  store float %tmp12, float* %tmp5
+  ret void
+}
+
+Compiles to:
+
+_test:                                  ## @test
+	movd	%xmm0, %rax
+	shrq	$32, %rax
+	movl	%eax, 4(%rdi)
+	ret
+
+This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
+doing a shuffle from v[1] to v[0] then a float store.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt
index 69dc8ee..c26c75a 100644
--- a/lib/Target/X86/README-UNIMPLEMENTED.txt
+++ b/lib/Target/X86/README-UNIMPLEMENTED.txt
@@ -11,4 +11,4 @@ which would be great.
 2) vector comparisons
 3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688
 4) bitcasts from vectors to scalars: PR2804
-
+5) llvm.atomic.cmp.swap.i128.p0i128: PR3462
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index aa7bb3d..3c6138b 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1868,3 +1868,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this
 information to add the "lock" prefix.
 
 //===---------------------------------------------------------------------===//
+
+_Bool bar(int *x) { return *x & 1; }
+
+define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=1]
+  %and = and i32 %tmp1, 1                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  ret i1 %tobool
+}
+
+bar:                                                        # @bar
+# BB#0:                                                     # %entry
+	movl	4(%esp), %eax
+	movb	(%eax), %al
+	andb	$1, %al
+	movzbl	%al, %eax
+	ret
+
+Missed optimization: should be movl+andl.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following two functions compiled with clang:
+_Bool foo(int *x) { return !(*x & 4); }
+unsigned bar(int *x) { return !(*x & 4); }
+
+foo:
+	movl	4(%esp), %eax
+	testb	$4, (%eax)
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+bar:
+	movl	4(%esp), %eax
+	movl	(%eax), %eax
+	shrl	$2, %eax
+	andl	$1, %eax
+	xorl	$1, %eax
+	ret
+
+The second function generates more code even though the two functions are
+are functionally identical.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int x(int y) { return (y & 63) << 14; }
+
+Code produced by gcc:
+	andl	$63, %edi
+	sall	$14, %edi
+	movl	%edi, %eax
+	ret
+
+Code produced by clang:
+	shll	$14, %edi
+	movl	%edi, %eax
+	andl	$1032192, %eax
+	ret
+
+The code produced by gcc is 3 bytes shorter.  This sort of construct often
+shows up with bitfields.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile
index 211607f..9858e6a 100644
--- a/lib/Target/X86/TargetInfo/Makefile
+++ b/lib/Target/X86/TargetInfo/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMX86Info
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 684c61f..1a1e447 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -23,6 +23,7 @@ class X86TargetMachine;
 class FunctionPass;
 class MachineCodeEmitter;
 class MCCodeEmitter;
+class MCContext;
 class JITCodeEmitter;
 class Target;
 class formatted_raw_ostream;
@@ -46,15 +47,13 @@ FunctionPass *createX87FPRegKillInserterPass();
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
 /// to the specified MCE object.
-
-FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, 
-                                       MachineCodeEmitter &MCE);
 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                           JITCodeEmitter &JCE);
-FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
-                                             ObjectCodeEmitter &OCE);
 
-MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
+MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
+MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
 
 /// createX86EmitCodeToMemory - Returns a pass that converts a register
 /// allocated function into raw machine code in a dynamically
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index ea52795..ab67acb 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
 X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
 }
 
-void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F,
-                                            const X86MachineFunctionInfo &Val) {
-  FunctionInfoMap[F] = Val;
+void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) {
+  CygMingStubs.insert(Name);
 }
 
-
-
-static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
-                                                    const TargetData &TD) {
-  X86MachineFunctionInfo Info;
-  uint64_t Size = 0;
-  
-  switch (F->getCallingConv()) {
-  case CallingConv::X86_StdCall:
-    Info.setDecorationStyle(StdCall);
-    break;
-  case CallingConv::X86_FastCall:
-    Info.setDecorationStyle(FastCall);
-    break;
-  default:
-    return Info;
-  }
-  
-  unsigned argNum = 1;
-  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-       AI != AE; ++AI, ++argNum) {
-    const Type* Ty = AI->getType();
-    
-    // 'Dereference' type in case of byval parameter attribute
-    if (F->paramHasAttr(argNum, Attribute::ByVal))
-      Ty = cast<PointerType>(Ty)->getElementType();
-    
-    // Size should be aligned to DWORD boundary
-    Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
-  }
-  
-  // We're not supporting tooooo huge arguments :)
-  Info.setBytesToPopOnReturn((unsigned int)Size);
-  return Info;
-}
-
-
-/// DecorateCygMingName - Query FunctionInfoMap and use this information for
-/// various name decorations for Cygwin and MingW.
+/// DecorateCygMingName - Apply various name decorations if the function uses
+/// stdcall or fastcall calling convention.
 void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
                                                    const GlobalValue *GV,
                                                    const TargetData &TD) {
   const Function *F = dyn_cast<Function>(GV);
   if (!F) return;
-  
-  // Save function name for later type emission.
-  if (F->isDeclaration())
-    CygMingStubs.insert(StringRef(Name.data(), Name.size()));
-  
+
   // We don't want to decorate non-stdcall or non-fastcall functions right now
   CallingConv::ID CC = F->getCallingConv();
   if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
     return;
-  
-  const X86MachineFunctionInfo *Info;
-  
-  FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
-  if (info_item == FunctionInfoMap.end()) {
-    // Calculate apropriate function info and populate map
-    FunctionInfoMap[F] = calculateFunctionInfo(F, TD);
-    Info = &FunctionInfoMap[F];
-  } else {
-    Info = &info_item->second;
-  }
-  
-  if (Info->getDecorationStyle() == None) return;
+
+  unsigned ArgWords = 0;
+  DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F);
+  if (item == FnArgWords.end()) {
+    // Calculate arguments sizes
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      const Type* Ty = AI->getType();
+
+      // 'Dereference' type in case of byval parameter attribute
+      if (AI->hasByValAttr())
+        Ty = cast<PointerType>(Ty)->getElementType();
+
+      // Size should be aligned to DWORD boundary
+      ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+    }
+
+    FnArgWords[F] = ArgWords;
+  } else
+    ArgWords = item->second;
+
   const FunctionType *FT = F->getFunctionType();
-  
   // "Pure" variadic functions do not receive @0 suffix.
   if (!FT->isVarArg() || FT->getNumParams() == 0 ||
       (FT->getNumParams() == 1 && F->hasStructRetAttr()))
-    raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn();
-  
-  if (Info->getDecorationStyle() == FastCall) {
+    raw_svector_ostream(Name) << '@' << ArgWords;
+
+  if (CC == CallingConv::X86_FastCall) {
     if (Name[0] == '_')
       Name[0] = '@';
     else
       Name.insert(Name.begin(), '@');
-  }    
+  }
 }
 
 /// DecorateCygMingName - Query FunctionInfoMap and use this information for
@@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name,
                                                    const TargetData &TD) {
   SmallString<128> NameStr(Name->getName().begin(), Name->getName().end());
   DecorateCygMingName(NameStr, GV, TD);
-  
+
   Name = Ctx.GetOrCreateSymbol(NameStr.str());
 }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0e2009e..9de3dcd 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -21,44 +21,25 @@
 namespace llvm {
   class X86MachineFunctionInfo;
   class TargetData;
-  
+
 /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
 /// for X86 COFF targets.
 class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
   StringSet<> CygMingStubs;
-  
-  // We have to propagate some information about MachineFunction to
-  // AsmPrinter. It's ok, when we're printing the function, since we have
-  // access to MachineFunction and can get the appropriate MachineFunctionInfo.
-  // Unfortunately, this is not possible when we're printing reference to
-  // Function (e.g. calling it and so on). Even more, there is no way to get the
-  // corresponding MachineFunctions: it can even be not created at all. That's
-  // why we should use additional structure, when we're collecting all necessary
-  // information.
-  //
-  // This structure is using e.g. for name decoration for stdcall & fastcall'ed
-  // function, since we have to use arguments' size for decoration.
-  typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
-  FMFInfoMap FunctionInfoMap;
-  
+  DenseMap<const Function*, unsigned> FnArgWords;
 public:
   X86COFFMachineModuleInfo(const MachineModuleInfo &);
   ~X86COFFMachineModuleInfo();
-  
-  
+
   void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx,
                            const GlobalValue *GV, const TargetData &TD);
   void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
                            const TargetData &TD);
-  
-  void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val);
-  
 
+  void addExternalFunction(const StringRef& Name);
   typedef StringSet<>::const_iterator stub_iterator;
   stub_iterator stub_begin() const { return CygMingStubs.begin(); }
   stub_iterator stub_end() const { return CygMingStubs.end(); }
-
-  
 };
 
 
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 828e872..8deadf6 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -21,9 +21,7 @@
 #include "X86.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -110,19 +108,10 @@ template<class CodeEmitter>
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
 /// to the specified templated MachineCodeEmitter object.
-
-FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM,
-                                             MachineCodeEmitter &MCE) {
-  return new Emitter<MachineCodeEmitter>(TM, MCE);
-}
 FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
-FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM,
-                                                   ObjectCodeEmitter &OCE) {
-  return new Emitter<ObjectCodeEmitter>(TM, OCE);
-}
 
 template<class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
@@ -202,8 +191,15 @@ template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
                                                      unsigned Reloc) {
   intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+  // X86 never needs stubs because instruction selection will always pick
+  // an instruction sequence that is large enough to hold any address
+  // to a symbol.
+  // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+  bool NeedStub = false;
   MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
-                                                 Reloc, ES, RelocCST));
+                                                 Reloc, ES, RelocCST,
+                                                 0, NeedStub));
   if (Reloc == X86::reloc_absolute_dword)
     MCE.emitDWordLE(0);
   else
@@ -253,7 +249,7 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
 
 template<class CodeEmitter>
 unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
-  return II->getRegisterInfo().getX86RegNum(RegNo);
+  return X86RegisterInfo::getX86RegNum(RegNo);
 }
 
 inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
@@ -391,86 +387,103 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   // If no BaseReg, issue a RIP relative instruction only if the MCE can 
   // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
   // 2-7) and absolute references.
-  if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
+  unsigned BaseRegNo = -1U;
+  if (BaseReg != 0 && BaseReg != X86::RIP)
+    BaseRegNo = getX86RegNum(BaseReg);
+
+  if (// The SIB byte must be used if there is an index register.
       IndexReg.getReg() == 0 && 
-      ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || 
-       (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) {
-    if (BaseReg == 0 || BaseReg == X86::RIP) {  // Just a displacement?
-      // Emit special case [disp32] encoding
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!Is64BitMode || BaseReg != 0)) {
+    if (BaseReg == 0 ||          // [disp32]     in X86-32 mode
+        BaseReg == X86::RIP) {   // [disp32+RIP] in X86-64 mode
       MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
       emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
-    } else {
-      unsigned BaseRegNo = getX86RegNum(BaseReg);
-      if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
-        // Emit simple indirect register encoding... [EAX] f.e.
-        MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
-      } else if (!DispForReloc && isDisp8(DispVal)) {
-        // Emit the disp8 encoding... [REG+disp8]
-        MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
-        emitConstant(DispVal, 1);
-      } else {
-        // Emit the most general non-SIB encoding: [REG+disp32]
-        MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
-        emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
-      }
+      return;
     }
-
-  } else {  // We need a SIB byte, so start by outputting the ModR/M byte first
-    assert(IndexReg.getReg() != X86::ESP &&
-           IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
-    bool ForceDisp32 = false;
-    bool ForceDisp8  = false;
-    if (BaseReg == 0) {
-      // If there is no base register, we emit the special case SIB byte with
-      // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
-      MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
-      ForceDisp32 = true;
-    } else if (DispForReloc) {
-      // Emit the normal disp32 encoding.
-      MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
-      ForceDisp32 = true;
-    } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
-      // Emit no displacement ModR/M byte
-      MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
-    } else if (isDisp8(DispVal)) {
-      // Emit the disp8 encoding...
-      MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
-      ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
-    } else {
-      // Emit the normal disp32 encoding...
-      MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
-    }
-
-    // Calculate what the SS field value should be...
-    static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
-    unsigned SS = SSTable[Scale.getImm()];
-
-    if (BaseReg == 0) {
-      // Handle the SIB byte for the case where there is no base, see Intel 
-      // Manual 2A, table 2-7. The displacement has already been output.
-      unsigned IndexRegNo;
-      if (IndexReg.getReg())
-        IndexRegNo = getX86RegNum(IndexReg.getReg());
-      else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
-        IndexRegNo = 4;
-      emitSIBByte(SS, IndexRegNo, 5);
-    } else {
-      unsigned BaseRegNo = getX86RegNum(BaseReg);
-      unsigned IndexRegNo;
-      if (IndexReg.getReg())
-        IndexRegNo = getX86RegNum(IndexReg.getReg());
-      else
-        IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
-      emitSIBByte(SS, IndexRegNo, BaseRegNo);
+    
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+      MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+      return;
     }
-
-    // Do we need to output a displacement?
-    if (ForceDisp8) {
+    
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (!DispForReloc && isDisp8(DispVal)) {
+      MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
       emitConstant(DispVal, 1);
-    } else if (DispVal != 0 || ForceDisp32) {
-      emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+      return;
     }
+    
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+    return;
+  }
+  
+  // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispForReloc) {
+    // Emit the normal disp32 encoding.
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+    // Emit no displacement ModR/M byte
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+  } else if (isDisp8(DispVal)) {
+    // Emit the disp8 encoding...
+    MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding...
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+  }
+
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel 
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    emitSIBByte(SS, IndexRegNo, 5);
+  } else {
+    unsigned BaseRegNo = getX86RegNum(BaseReg);
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    emitSIBByte(SS, IndexRegNo, BaseRegNo);
+  }
+
+  // Do we need to output a displacement?
+  if (ForceDisp8) {
+    emitConstant(DispVal, 1);
+  } else if (DispVal != 0 || ForceDisp32) {
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
   }
 }
 
@@ -570,7 +583,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
     --NumOps;
 
-  unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
   switch (Desc->TSFlags & X86II::FormMask) {
   default:
     llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
@@ -582,25 +595,25 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       llvm_unreachable("psuedo instructions should be removed before code"
                        " emission");
       break;
-    case TargetInstrInfo::INLINEASM:
+    case TargetOpcode::INLINEASM:
       // We allow inline assembler nodes with empty bodies - they can
       // implicitly define registers, which is ok for JIT.
       if (MI.getOperand(0).getSymbolName()[0])
         llvm_report_error("JIT does not support inline asm!");
       break;
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
-    case TargetInstrInfo::GC_LABEL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::GC_LABEL:
       MCE.emitLabel(MI.getOperand(0).getImm());
       break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
     case X86::FP_REG_KILL:
       break;
     case X86::MOVPC32r: {
       // This emits the "call" portion of this pseudo instruction.
       MCE.emitByte(BaseOpcode);
-      emitConstant(0, X86InstrInfo::sizeOfImm(Desc));
+      emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
       // Remember PIC base.
       PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
       X86JITInfo *JTI = TM.getJITInfo();
@@ -639,15 +652,21 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
       break;
     }
+
+    // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+    if (MO.isJTI()) {
+      emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+      break;
+    }
     
     assert(MO.isImm() && "Unknown RawFrm operand!");
     if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
       // Fix up immediate operand for pc relative calls.
       intptr_t Imm = (intptr_t)MO.getImm();
       Imm = Imm - MCE.getCurrentPCValue() - 4;
-      emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
+      emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
     } else
-      emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
+      emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
     break;
   }
       
@@ -658,7 +677,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       break;
       
     const MachineOperand &MO1 = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
     if (MO1.isImm()) {
       emitConstant(MO1.getImm(), Size);
       break;
@@ -691,7 +710,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     CurOp += 2;
     if (CurOp != NumOps)
       emitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86InstrInfo::sizeOfImm(Desc));
+                   X86II::getSizeOfImm(Desc->TSFlags));
     break;
   }
   case X86II::MRMDestMem: {
@@ -702,7 +721,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     CurOp +=  X86AddrNumOperands + 1;
     if (CurOp != NumOps)
       emitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86InstrInfo::sizeOfImm(Desc));
+                   X86II::getSizeOfImm(Desc->TSFlags));
     break;
   }
 
@@ -713,7 +732,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     CurOp += 2;
     if (CurOp != NumOps)
       emitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86InstrInfo::sizeOfImm(Desc));
+                   X86II::getSizeOfImm(Desc->TSFlags));
     break;
 
   case X86II::MRMSrcMem: {
@@ -726,7 +745,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       AddrOperands = X86AddrNumOperands;
 
     intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
-      X86InstrInfo::sizeOfImm(Desc) : 0;
+      X86II::getSizeOfImm(Desc->TSFlags) : 0;
 
     MCE.emitByte(BaseOpcode);
     emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
@@ -734,7 +753,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
     CurOp += AddrOperands + 1;
     if (CurOp != NumOps)
       emitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86InstrInfo::sizeOfImm(Desc));
+                   X86II::getSizeOfImm(Desc->TSFlags));
     break;
   }
 
@@ -743,33 +762,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
   case X86II::MRM4r: case X86II::MRM5r:
   case X86II::MRM6r: case X86II::MRM7r: {
     MCE.emitByte(BaseOpcode);
-
-    // Special handling of lfence, mfence, monitor, and mwait.
-    if (Desc->getOpcode() == X86::LFENCE ||
-        Desc->getOpcode() == X86::MFENCE ||
-        Desc->getOpcode() == X86::MONITOR ||
-        Desc->getOpcode() == X86::MWAIT) {
-      emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
-
-      switch (Desc->getOpcode()) {
-      default: break;
-      case X86::MONITOR:
-        MCE.emitByte(0xC8);
-        break;
-      case X86::MWAIT:
-        MCE.emitByte(0xC9);
-        break;
-      }
-    } else {
-      emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
-                       (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
-    }
+    emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+                     (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
 
     if (CurOp == NumOps)
       break;
     
     const MachineOperand &MO1 = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
     if (MO1.isImm()) {
       emitConstant(MO1.getImm(), Size);
       break;
@@ -798,7 +798,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
   case X86II::MRM6m: case X86II::MRM7m: {
     intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ?
       (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? 
-          X86InstrInfo::sizeOfImm(Desc) : 4) : 0;
+          X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
 
     MCE.emitByte(BaseOpcode);
     emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
@@ -809,7 +809,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       break;
     
     const MachineOperand &MO = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
     if (MO.isImm()) {
       emitConstant(MO.getImm(), Size);
       break;
@@ -839,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
                      getX86RegNum(MI.getOperand(CurOp).getReg()));
     ++CurOp;
     break;
+      
+  case X86II::MRM_C1:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC1);
+    break;
+  case X86II::MRM_C8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC8);
+    break;
+  case X86II::MRM_C9:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC9);
+    break;
+  case X86II::MRM_E8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xE8);
+    break;
+  case X86II::MRM_F0:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xF0);
+    break;
   }
 
   if (!Desc->isVariadic() && CurOp != NumOps) {
@@ -850,256 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
 
   MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
-
-// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter.
-//
-// FIXME: This is a total hack designed to allow work on llvm-mc to proceed
-// without being blocked on various cleanups needed to support a clean interface
-// to instruction encoding.
-//
-// Look away!
-
-#include "llvm/DerivedTypes.h"
-
-namespace {
-class MCSingleInstructionCodeEmitter : public MachineCodeEmitter {
-  uint8_t Data[256];
-
-public:
-  MCSingleInstructionCodeEmitter() { reset(); }
-
-  void reset() { 
-    BufferBegin = Data;
-    BufferEnd = array_endof(Data);
-    CurBufferPtr = Data;
-  }
-
-  StringRef str() {
-    return StringRef(reinterpret_cast<char*>(BufferBegin),
-                     CurBufferPtr - BufferBegin);
-  }
-
-  virtual void startFunction(MachineFunction &F) {}
-  virtual bool finishFunction(MachineFunction &F) { return false; }
-  virtual void emitLabel(uint64_t LabelID) {}
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {}
-  virtual bool earlyResolveAddresses() const { return false; }
-  virtual void addRelocation(const MachineRelocation &MR) { }
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-    return 0;
-  }
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-    return 0;
-  }
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-    return 0;
-  }
-  virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
-    return 0;
-  }
-  virtual void setModuleInfo(MachineModuleInfo* Info) {}
-};
-
-class X86MCCodeEmitter : public MCCodeEmitter {
-  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-
-private:
-  X86TargetMachine &TM;
-  llvm::Function *DummyF;
-  TargetData *DummyTD;
-  mutable llvm::MachineFunction *DummyMF;
-  llvm::MachineBasicBlock *DummyMBB;
-  
-  MCSingleInstructionCodeEmitter *InstrEmitter;
-  Emitter<MachineCodeEmitter> *Emit;
-
-public:
-  X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) {
-    // Verily, thou shouldst avert thine eyes.
-    const llvm::FunctionType *FTy =
-      FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false);
-    DummyF = Function::Create(FTy, GlobalValue::InternalLinkage);
-    DummyTD = new TargetData("");
-    DummyMF = new MachineFunction(DummyF, TM);
-    DummyMBB = DummyMF->CreateMachineBasicBlock();
-
-    InstrEmitter = new MCSingleInstructionCodeEmitter();
-    Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, 
-                                           *TM.getInstrInfo(),
-                                           *DummyTD, false);
-  }
-  ~X86MCCodeEmitter() {
-    delete Emit;
-    delete InstrEmitter;
-    delete DummyMF;
-    delete DummyF;
-  }
-
-  bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    if (Start + 1 > MI.getNumOperands())
-      return false;
-
-    const MCOperand &Op = MI.getOperand(Start);
-    if (!Op.isReg()) return false;
-
-    Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false));
-    return true;
-  }
-
-  bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    if (Start + 1 > MI.getNumOperands())
-      return false;
-
-    const MCOperand &Op = MI.getOperand(Start);
-    if (Op.isImm()) {
-      Instr->addOperand(MachineOperand::CreateImm(Op.getImm()));
-      return true;
-    }
-    if (!Op.isExpr())
-      return false;
-
-    const MCExpr *Expr = Op.getExpr();
-    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
-      Instr->addOperand(MachineOperand::CreateImm(CE->getValue()));
-      return true;
-    }
-
-    // FIXME: Relocation / fixup.
-    Instr->addOperand(MachineOperand::CreateImm(0));
-    return true;
-  }
-
-  bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    return (AddRegToInstr(MI, Instr, Start + 0) &&
-            AddImmToInstr(MI, Instr, Start + 1) &&
-            AddRegToInstr(MI, Instr, Start + 2) &&
-            AddImmToInstr(MI, Instr, Start + 3));
-  }
-
-  bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    return (AddRegToInstr(MI, Instr, Start + 0) &&
-            AddImmToInstr(MI, Instr, Start + 1) &&
-            AddRegToInstr(MI, Instr, Start + 2) &&
-            AddImmToInstr(MI, Instr, Start + 3) &&
-            AddRegToInstr(MI, Instr, Start + 4));
-  }
-
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const {
-    // Don't look yet!
-
-    // Convert the MCInst to a MachineInstr so we can (ab)use the regular
-    // emitter.
-    const X86InstrInfo &II = *TM.getInstrInfo();
-    const TargetInstrDesc &Desc = II.get(MI.getOpcode());    
-    MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc());
-    DummyMBB->push_back(Instr);
-
-    unsigned Opcode = MI.getOpcode();
-    unsigned NumOps = MI.getNumOperands();
-    unsigned CurOp = 0;
-    if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) {
-      Instr->addOperand(MachineOperand::CreateReg(0, false));
-      ++CurOp;
-    } else if (NumOps > 2 && 
-             Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
-      // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
-      --NumOps;
-
-    bool OK = true;
-    switch (Desc.TSFlags & X86II::FormMask) {
-    case X86II::MRMDestReg:
-    case X86II::MRMSrcReg:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::RawFrm:
-      if (CurOp < NumOps) {
-        // Hack to make branches work.
-        if (!(Desc.TSFlags & X86II::ImmMask) &&
-            MI.getOperand(0).isExpr() &&
-            isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr()))
-          Instr->addOperand(MachineOperand::CreateMBB(DummyMBB));
-        else
-          OK &= AddImmToInstr(MI, Instr, CurOp);
-      }
-      break;
-
-    case X86II::AddRegFrm:
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRM0r: case X86II::MRM1r:
-    case X86II::MRM2r: case X86II::MRM3r:
-    case X86II::MRM4r: case X86II::MRM5r:
-    case X86II::MRM6r: case X86II::MRM7r:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, 0); CurOp++;
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-      
-    case X86II::MRM0m: case X86II::MRM1m:
-    case X86II::MRM2m: case X86II::MRM3m:
-    case X86II::MRM4m: case X86II::MRM5m:
-    case X86II::MRM6m: case X86II::MRM7m:
-      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRMSrcMem:
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
-          Opcode == X86::LEA16r || Opcode == X86::LEA32r)
-        OK &= AddLMemToInstr(MI, Instr, CurOp);
-      else
-        OK &= AddMemToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRMDestMem:
-      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
-      OK &= AddRegToInstr(MI, Instr, CurOp);
-      break;
-
-    default:
-    case X86II::MRMInitReg:
-    case X86II::Pseudo:
-      OK = false;
-      break;
-    }
-
-    if (!OK) {
-      dbgs() << "couldn't convert inst '";
-      MI.dump();
-      dbgs() << "' to machine instr:\n";
-      Instr->dump();
-    }
-
-    InstrEmitter->reset();
-    if (OK)
-      Emit->emitInstruction(*Instr, &Desc);
-    OS << InstrEmitter->str();
-
-    Instr->eraseFromParent();
-  }
-};
-}
-
-// Ok, now you can look.
-MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &,
-                                            TargetMachine &TM) {
-  return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM));
-}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index d5ad61b..69a9d60 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
 
 bool X86FastISel::X86SelectZExt(Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
-  if (I->getType()->isInteger(8) &&
-      I->getOperand(0)->getType()->isInteger(1)) {
+  if (I->getType()->isIntegerTy(8) &&
+      I->getOperand(0)->getType()->isIntegerTy(1)) {
     unsigned ResultReg = getRegForValue(I->getOperand(0));
     if (ResultReg == 0) return false;
     // Set the high bits to zero.
@@ -828,30 +828,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
         std::swap(TrueMBB, FalseMBB);
         Predicate = CmpInst::FCMP_UNE;
         // FALL THROUGH
-      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA;  break;
-      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
-      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA;  break;
-      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE; break;
-      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
-      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP;  break;
-      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE;  break;
-      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB;  break;
-      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE; break;
-      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
-      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
+      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
+      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
           
-      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE;  break;
-      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA;  break;
-      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
-      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
-      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
-      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG;  break;
-      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
-      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL;  break;
-      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
+      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
+      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
+      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
       default:
         return false;
       }
@@ -869,7 +869,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
       if (Predicate == CmpInst::FCMP_UNE) {
         // X86 requires a second branch to handle UNE (and OEQ,
         // which is mapped to UNE above).
-        BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
+        BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
       }
 
       FastEmitBranch(FalseMBB);
@@ -923,7 +923,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
           unsigned OpCode = SetMI->getOpcode();
 
           if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
-            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
+            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
+                                        X86::JO_4 : X86::JB_4))
               .addMBB(TrueMBB);
             FastEmitBranch(FalseMBB);
             MBB->addSuccessor(TrueMBB);
@@ -939,7 +940,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
   if (OpReg == 0) return false;
 
   BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
-  BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
+  BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
   FastEmitBranch(FalseMBB);
   MBB->addSuccessor(TrueMBB);
   return true;
@@ -948,7 +949,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
 bool X86FastISel::X86SelectShift(Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
-  if (I->getType()->isInteger(8)) {
+  if (I->getType()->isIntegerTy(8)) {
     CReg = X86::CL;
     RC = &X86::GR8RegClass;
     switch (I->getOpcode()) {
@@ -957,7 +958,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(16)) {
+  } else if (I->getType()->isIntegerTy(16)) {
     CReg = X86::CX;
     RC = &X86::GR16RegClass;
     switch (I->getOpcode()) {
@@ -966,7 +967,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(32)) {
+  } else if (I->getType()->isIntegerTy(32)) {
     CReg = X86::ECX;
     RC = &X86::GR32RegClass;
     switch (I->getOpcode()) {
@@ -975,7 +976,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(64)) {
+  } else if (I->getType()->isIntegerTy(64)) {
     CReg = X86::RCX;
     RC = &X86::GR64RegClass;
     switch (I->getOpcode()) {
@@ -1012,7 +1013,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
   // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
   // we're doing here.
   if (CReg != X86::CL)
-    BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL)
+    BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
       .addReg(CReg).addImm(X86::SUBREG_8BIT);
 
   unsigned ResultReg = createResultReg(RC);
@@ -1153,6 +1154,17 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
+  case Intrinsic::dbg_declare: {
+    DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+    X86AddressMode AM;
+    assert(DI->getAddress() && "Null address should be checked earlier!");
+    if (!X86SelectAddress(DI->getAddress(), AM))
+      return false;
+    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
+                                        addMetadata(DI->getVariable());
+    return true;
+  }
   case Intrinsic::trap: {
     BuildMI(MBB, DL, TII.get(X86::TRAP));
     return true;
@@ -1236,7 +1248,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
-  if (CC == CallingConv::Fast && PerformTailCallOpt)
+  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
     return false;
 
   // Let SDISel handle vararg functions.
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 0000000..c8dac3c
--- /dev/null
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,25 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+  reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
+  reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
+  reloc_riprel_4byte                         // 32-bit rip-relative
+};
+}
+}
+
+#endif
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 503ac14..6d6fe77 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -235,7 +235,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     unsigned Flags = MI->getDesc().TSFlags;
     
     unsigned FPInstClass = Flags & X86II::FPTypeMask;
-    if (MI->getOpcode() == TargetInstrInfo::INLINEASM)
+    if (MI->isInlineAsm())
       FPInstClass = X86II::SpecialFP;
     
     if (FPInstClass == X86II::NotFP)
@@ -1083,7 +1083,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     }
     }
     break;
-  case TargetInstrInfo::INLINEASM: {
+  case TargetOpcode::INLINEASM: {
     // The inline asm MachineInstr currently only *uses* FP registers for the
     // 'f' constraint.  These should be turned into the current ST(x) register
     // in the machine instr.  Also, any kills should be explicitly popped after
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 34a0045..6a117dd 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
         for (BasicBlock::const_iterator II = SI->begin();
              (PN = dyn_cast<PHINode>(II)); ++II) {
           if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
-              (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
+              (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
               (!Subtarget.hasSSE2() &&
                 PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
             ContainsFPCode = true;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 91e0483..7b349f6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -183,8 +183,9 @@ namespace {
 
     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
 
-    virtual
-      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+    virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+    virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const;
 
 // Include the pieces autogenerated from the target description.
 #include "X86GenDAGISel.inc"
@@ -303,11 +304,18 @@ namespace {
 }
 
 
-bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                 SDNode *Root) const {
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
-  if (U == Root)
+  if (!N.hasOneUse())
+    return false;
+
+  if (N.getOpcode() != ISD::LOAD)
+    return true;
+
+  // If N is a load, do additional profitability checks.
+  if (U == Root) {
     switch (U->getOpcode()) {
     default: break;
     case X86ISD::ADD:
@@ -354,9 +362,17 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
       }
     }
     }
+  }
+
+  return true;
+}
+
+
+bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
 
   // Proceed to 'generic' cycle finder code
-  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
+  return SelectionDAGISel::IsLegalToFold(N, U, Root);
 }
 
 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
@@ -652,9 +668,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
     // FIXME: optimize the case where the src/dest is a load or store?
     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
                                           N->getOperand(0),
-                                          MemTmp, NULL, 0, MemVT);
+                                          MemTmp, NULL, 0, MemVT,
+                                          false, false, 0);
     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
-                                        NULL, 0, MemVT);
+                                        NULL, 0, MemVT, false, false, 0);
 
     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
     // extload we created.  This will cause general havok on the dag because
@@ -1310,8 +1327,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
     InChain = N.getOperand(0).getValue(1);
     if (ISD::isNON_EXTLoad(InChain.getNode()) &&
         InChain.getValue(0).hasOneUse() &&
-        N.hasOneUse() &&
-        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
+        IsProfitableToFold(N, Pred.getNode(), Op) &&
+        IsLegalToFold(N, Pred.getNode(), Op)) {
       LoadSDNode *LD = cast<LoadSDNode>(InChain);
       if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
         return false;
@@ -1435,8 +1452,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Index, SDValue &Disp,
                                   SDValue &Segment) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
-      N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P, P))
+      IsProfitableToFold(N, P, P) &&
+      IsLegalToFold(N, P, P))
     return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
   return false;
 }
@@ -1606,7 +1623,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
   }
 
   DebugLoc dl = Node->getDebugLoc();
-  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1652,8 +1669,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
       case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
       case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
       case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
-      case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
-      case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
       case X86::CMOVA16rr: case X86::CMOVA16rm:
       case X86::CMOVA32rr: case X86::CMOVA32rm:
       case X86::CMOVA64rr: case X86::CMOVA64rm:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 11e07df..9974d8c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12,9 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "x86-isel"
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
+#include "X86MCTargetExpr.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/CallingConv.h"
@@ -26,24 +28,30 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+STATISTIC(NumTailCalls, "Number of tail calls");
+
 static cl::opt<bool>
 DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
 
@@ -67,13 +75,14 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
       return new X8664_MachoTargetObjectFile();
     return new X8632_MachoTargetObjectFile();
   case X86Subtarget::isELF:
-    return new TargetLoweringObjectFileELF();
+   if (TM.getSubtarget<X86Subtarget>().is64Bit())
+     return new X8664_ELFTargetObjectFile(TM);
+    return new X8632_ELFTargetObjectFile(TM);
   case X86Subtarget::isMingw:
   case X86Subtarget::isCygwin:
   case X86Subtarget::isWindows:
     return new TargetLoweringObjectFileCOFF();
   }
-
 }
 
 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
@@ -747,6 +756,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
 
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2f64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2i64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i8, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i16, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i32, Custom);
+
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
       EVT VT = (MVT::SimpleValueType)i;
@@ -987,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Divide and reminder operations have no vector equivalent and can
-  // trap. Do a custom widening for these operations in which we never
-  // generate more divides/remainder than the original vector width.
-  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-    if (!isTypeLegal((MVT::SimpleValueType)VT)) {
-      setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
-    }
-  }
-
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -1084,12 +1086,46 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
   return MVT::i32;
 }
 
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+  // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+  // symbol.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+      Subtarget->isPICStyleGOT())
+    return MachineJumpTableInfo::EK_Custom32;
+  
+  // Otherwise, use the normal jump table encoding heuristics.
+  return TargetLowering::getJumpTableEncoding();
+}
+
+/// getPICBaseSymbol - Return the X86-32 PIC base.
+MCSymbol *
+X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
+                                    MCContext &Ctx) const {
+  const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
+  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+                               Twine(MF->getFunctionNumber())+"$pb");
+}
+
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                             const MachineBasicBlock *MBB,
+                                             unsigned uid,MCContext &Ctx) const{
+  assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+         Subtarget->isPICStyleGOT());
+  // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+  // entries.
+  return X86MCTargetExpr::Create(MBB->getSymbol(Ctx),
+                                 X86MCTargetExpr::GOTOFF, Ctx);
+}
+
 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
 /// jumptable.
 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
-                                                      SelectionDAG &DAG) const {
-  if (usesGlobalOffsetTable())
-    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+                                                    SelectionDAG &DAG) const {
   if (!Subtarget->is64Bit())
     // This doesn't have DebugLoc associated with it, but is not really the
     // same as a Register.
@@ -1098,6 +1134,20 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   return Table;
 }
 
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+                             MCContext &Ctx) const {
+  // X86-64 uses RIP relative addressing based on the jump table label.
+  if (Subtarget->isPICStyleRIPRel())
+    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+  // Otherwise, the reference is relative to the PIC base.
+  return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx);
+}
+
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
   return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
@@ -1131,13 +1181,11 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                  RVLocs, *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
+  // Add the regs to the liveout set for the function.
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+  for (unsigned i = 0; i != RVLocs.size(); ++i)
+    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
+      MRI.addLiveOut(RVLocs[i].getLocReg());
 
   SDValue Flag;
 
@@ -1190,7 +1238,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
-      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64));
       FuncInfo->setSRetReturnReg(Reg);
     }
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
@@ -1199,7 +1247,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     Flag = Chain.getValue(1);
 
     // RAX now acts like a return value.
-    MF.getRegInfo().addLiveOut(X86::RAX);
+    MRI.addLiveOut(X86::RAX);
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -1329,7 +1377,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
   case CallingConv::X86_FastCall:
     return !Subtarget->is64Bit();
   case CallingConv::Fast:
-    return PerformTailCallOpt;
+    return GuaranteedTailCallOpt;
   }
 }
 
@@ -1351,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
     return CC_X86_32_C;
 }
 
-/// NameDecorationForCallConv - Selects the appropriate decoration to
-/// apply to a MachineFunction containing a given calling convention.
-NameDecorationStyle
-X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
-  if (CallConv == CallingConv::X86_FastCall)
-    return FastCall;
-  else if (CallConv == CallingConv::X86_StdCall)
-    return StdCall;
-  return None;
-}
-
-
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 /// by "Src" to address "Dst" with size and alignment information specified by
 /// the specific parameter attribute. The copy will be passed as a byval
@@ -1376,6 +1412,12 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                        /*AlwaysInline=*/true, NULL, 0, NULL, 0);
 }
 
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+  return GuaranteedTailCallOpt && CC == CallingConv::Fast;
+}
+
 SDValue
 X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     CallingConv::ID CallConv,
@@ -1384,10 +1426,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     const CCValAssign &VA,
                                     MachineFrameInfo *MFI,
                                     unsigned i) {
-
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
-  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt;
+  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
   EVT ValVT;
 
@@ -1402,13 +1443,18 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
   // changed with more analysis.
   // In case of tail call optimization mark all arguments mutable. Since they
   // could be overwritten by lowering of arguments in case of a tail call.
-  int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
-                                  VA.getLocMemOffset(), isImmutable, false);
-  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-  if (Flags.isByVal())
-    return FIN;
-  return DAG.getLoad(ValVT, dl, Chain, FIN,
-                     PseudoSourceValue::getFixedStack(FI), 0);
+  if (Flags.isByVal()) {
+    int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+                                    VA.getLocMemOffset(), isImmutable, false);
+    return DAG.getFrameIndex(FI, getPointerTy());
+  } else {
+    int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+                                    VA.getLocMemOffset(), isImmutable, false);
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    return DAG.getLoad(ValVT, dl, Chain, FIN,
+                       PseudoSourceValue::getFixedStack(FI), 0,
+                       false, false, 0);
+  }
 }
 
 SDValue
@@ -1429,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       Fn->getName() == "main")
     FuncInfo->setForceFramePointer(true);
 
-  // Decorate the function name.
-  FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
-
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWin64 = Subtarget->isTargetWin64();
@@ -1504,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     // If value is passed via pointer - do a load.
     if (VA.getLocInfo() == CCValAssign::Indirect)
-      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
+                             false, false, 0);
 
     InVals.push_back(ArgValue);
   }
@@ -1524,8 +1568,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   unsigned StackSize = CCInfo.getNextStackOffset();
-  // align stack specially for tail calls
-  if (PerformTailCallOpt && CallConv == CallingConv::Fast)
+  // Align stack specially for tail calls.
+  if (FuncIsMadeTailCallSafe(CallConv))
     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -1599,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
                        PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
-                       Offset);
+                       Offset, false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
       }
@@ -1636,13 +1680,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // Some CCs need callee pop.
   if (IsCalleePop(isVarArg, CallConv)) {
     BytesToPopOnReturn  = StackSize; // Callee pops everything.
-    BytesCallerReserves = 0;
   } else {
     BytesToPopOnReturn  = 0; // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
       BytesToPopOnReturn = 4;
-    BytesCallerReserves = StackSize;
   }
 
   if (!Is64Bit) {
@@ -1670,27 +1712,23 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
   }
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset);
+                      PseudoSourceValue::getStack(), LocMemOffset,
+                      false, false, 0);
 }
 
 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
 /// optimization is performed and it is required.
 SDValue
 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
-                                           SDValue &OutRetAddr,
-                                           SDValue Chain,
-                                           bool IsTailCall,
-                                           bool Is64Bit,
-                                           int FPDiff,
-                                           DebugLoc dl) {
-  if (!IsTailCall || FPDiff==0) return Chain;
-
+                                           SDValue &OutRetAddr, SDValue Chain,
+                                           bool IsTailCall, bool Is64Bit,
+                                           int FPDiff, DebugLoc dl) {
   // Adjust the Return address stack slot.
   EVT VT = getPointerTy();
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
-  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
@@ -1705,31 +1743,42 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
   // Calculate the new stack slot for the return address.
   int SlotSize = Is64Bit ? 8 : 4;
   int NewReturnAddrFI =
-    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize,
-                                         true, false);
+    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false);
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
-                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
+                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+                       false, false, 0);
   return Chain;
 }
 
 SDValue
 X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall,
+                             bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
                              DebugLoc dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals) {
-
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsStructRet    = CallIsStructReturn(Outs);
+  bool IsSibcall      = false;
+
+  if (isTailCall) {
+    // Check if it's really possible to do a tail call.
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+                                                   Outs, Ins, DAG);
+
+    // Sibcalls are automatically detected tailcalls which do not require
+    // ABI changes.
+    if (!GuaranteedTailCallOpt && isTailCall)
+      IsSibcall = true;
+
+    if (isTailCall)
+      ++NumTailCalls;
+  }
 
-  assert((!isTailCall ||
-          (CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
-         "IsEligibleForTailCallOptimization missed a case!");
   assert(!(isVarArg && CallConv == CallingConv::Fast) &&
          "Var args not supported with calling convention fastcc");
 
@@ -1741,11 +1790,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
-  if (PerformTailCallOpt && CallConv == CallingConv::Fast)
+  if (IsSibcall)
+    // This is a sibcall. The memory operands are available in caller's
+    // own caller's stack.
+    NumBytes = 0;
+  else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast)
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
-  if (isTailCall) {
+  if (isTailCall && !IsSibcall) {
     // Lower arguments at fp - stackoffset + fpdiff.
     unsigned NumBytesCallerPushed =
       MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
@@ -1757,12 +1810,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
   }
 
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  if (!IsSibcall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
   SDValue RetAddrFrIdx;
   // Load return adress for tail calls.
-  Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
-                                  FPDiff, dl);
+  if (isTailCall && FPDiff)
+    Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+                                    Is64Bit, FPDiff, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -1804,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
-                           PseudoSourceValue::getFixedStack(FI), 0);
+                           PseudoSourceValue::getFixedStack(FI), 0,
+                           false, false, 0);
       Arg = SpillSlot;
       break;
     }
@@ -1812,15 +1868,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      if (!isTailCall || (isTailCall && isByVal)) {
-        assert(VA.isMemLoc());
-        if (StackPtr.getNode() == 0)
-          StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
-
-        MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
-                                               dl, DAG, VA, Flags));
-      }
+    } else if (!IsSibcall && (!isTailCall || isByVal)) {
+      assert(VA.isMemLoc());
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
     }
   }
 
@@ -1840,7 +1893,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       InFlag = Chain.getValue(1);
     }
 
-
   if (Subtarget->isPICStyleGOT()) {
     // ELF / PIC requires GOT in the EBX register before function calls via PLT
     // GOT pointer.
@@ -1910,9 +1962,11 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     int FI = 0;
     // Do not flag preceeding copytoreg stuff together with the following stuff.
     InFlag = SDValue();
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-      CCValAssign &VA = ArgLocs[i];
-      if (!VA.isRegLoc()) {
+    if (GuaranteedTailCallOpt) {
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (VA.isRegLoc())
+          continue;
         assert(VA.isMemLoc());
         SDValue Arg = Outs[i].Val;
         ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -1937,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           // Store relative to framepointer.
           MemOpChains2.push_back(
             DAG.getStore(ArgChain, dl, Arg, FIN,
-                         PseudoSourceValue::getFixedStack(FI), 0));
+                         PseudoSourceValue::getFixedStack(FI), 0,
+                         false, false, 0));
         }
       }
     }
@@ -2020,21 +2075,22 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   }
 
   if (isTailCall && !WasGlobalOrExternal) {
-    unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
-
+    // Force the address into a (call preserved) caller-saved register since
+    // tailcall must happen after callee-saved registers are poped.
+    // FIXME: Give it a special register class that contains caller-saved
+    // register instead?
+    unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX;
     Chain = DAG.getCopyToReg(Chain,  dl,
-                             DAG.getRegister(Opc, getPointerTy()),
+                             DAG.getRegister(TCReg, getPointerTy()),
                              Callee,InFlag);
-    Callee = DAG.getRegister(Opc, getPointerTy());
-    // Add register as live out.
-    MF.getRegInfo().addLiveOut(Opc);
+    Callee = DAG.getRegister(TCReg, getPointerTy());
   }
 
   // Returns a chain & a flag for retval copy to use.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   SmallVector<SDValue, 8> Ops;
 
-  if (isTailCall) {
+  if (!IsSibcall && isTailCall) {
     Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                            DAG.getIntPtrConstant(0, true), InFlag);
     InFlag = Chain.getValue(1);
@@ -2095,7 +2151,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (IsCalleePop(isVarArg, CallConv))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
   else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
-    // If this is is a call to a struct-return function, the callee
+    // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
     NumBytesForCalleeToPush = 4;
@@ -2103,12 +2159,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     NumBytesForCalleeToPush = 0;  // Callee pops nothing.
 
   // Returns a flag for retval copy to use.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(NumBytesForCalleeToPush,
-                                                   true),
-                             InFlag);
-  InFlag = Chain.getValue(1);
+  if (!IsSibcall) {
+    Chain = DAG.getCALLSEQ_END(Chain,
+                               DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+                                                     true),
+                               InFlag);
+    InFlag = Chain.getValue(1);
+  }
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
@@ -2170,6 +2228,50 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   return Offset;
 }
 
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const X86InstrInfo *TII) {
+  int FI;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      unsigned Opcode = Def->getOpcode();
+      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+          Def->getOperand(1).isFI()) {
+        FI = Def->getOperand(1).getIndex();
+        if (MFI->getObjectSize(FI) != Flags.getByValSize())
+          return false;
+      } else
+        return false;
+    }
+  } else {
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
+    if (!Ld)
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  }
+
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI);
+}
+
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
@@ -2177,23 +2279,79 @@ bool
 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      CallingConv::ID CalleeCC,
                                                      bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
-  return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC;
+  if (CalleeCC != CallingConv::Fast &&
+      CalleeCC != CallingConv::C)
+    return false;
+
+  // If -tailcallopt is specified, make fastcc functions tail-callable.
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  if (GuaranteedTailCallOpt) {
+    if (CalleeCC == CallingConv::Fast &&
+        CallerF->getCallingConv() == CalleeCC)
+      return true;
+    return false;
+  }
+
+  // Look for obvious safe cases to perform tail call optimization that does not
+  // requite ABI changes. This is what gcc calls sibcall.
+
+  // Do not tail call optimize vararg calls for now.
+  if (isVarArg)
+    return false;
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+                   ArgLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+        return false;
+      if (Subtarget->isTargetWin64())
+        // Win64 ABI has additional complications.
+        return false;
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const X86InstrInfo *TII =
+        ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        EVT RegVT = VA.getLocVT();
+        SDValue Arg = Outs[i].Val;
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (!VA.isRegLoc()) {
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
+            return false;
+        }
+      }
+    }
+  }
+
+  return true;
 }
 
 FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf,
-                                  MachineModuleInfo *mmo,
-                                  DwarfWriter *dw,
-                                  DenseMap<const Value *, unsigned> &vm,
-                                  DenseMap<const BasicBlock *,
-                                           MachineBasicBlock *> &bm,
-                                  DenseMap<const AllocaInst *, int> &am
+X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo,
+                            DwarfWriter *dw,
+                            DenseMap<const Value *, unsigned> &vm,
+                            DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
+                            DenseMap<const AllocaInst *, int> &am
 #ifndef NDEBUG
-                                  , SmallSet<Instruction*, 8> &cil
+                          , SmallSet<Instruction*, 8> &cil
 #endif
                                   ) {
   return X86::createFastISel(mf, mmo, dw, vm, bm, am
@@ -3413,7 +3571,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
     int EltNo = (Offset - StartOffset) >> 2;
     int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
     EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
-    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+                             false, false, 0);
     // Canonicalize it to a v4i32 shuffle.
     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3686,6 +3845,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+  // We support concatenate two MMX registers and place them in a MMX
+  // register.  This is better than doing a stack convert.
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ResVT = Op.getValueType();
+  assert(Op.getNumOperands() == 2);
+  assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+         ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+  int Mask[2];
+  SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+  SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+  InVec = Op.getOperand(1);
+  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    unsigned NumElts = ResVT.getVectorNumElements();
+    VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+                       InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+  } else {
+    InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+    SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+    Mask[0] = 0; Mask[1] = 2;
+    VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+  }
+  return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+}
+
 // v8i16 shuffles - Prefer shuffles in the following order:
 // 1. [all]   pshuflw, pshufhw, optional move
 // 2. [ssse3] 1 x pshufb
@@ -4885,7 +5071,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -4965,7 +5151,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                              MVT::i32));
 
   SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
-                                      NULL, 0);
+                                      NULL, 0, false, false, 0);
 
   unsigned char OperandFlags = 0;
   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
@@ -4990,7 +5176,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
@@ -5107,7 +5293,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
-                               PseudoSourceValue::getFixedStack(SSFI), 0);
+                               PseudoSourceValue::getFixedStack(SSFI), 0,
+                               false, false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
@@ -5142,7 +5329,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     };
     Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
     Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0);
+                         PseudoSourceValue::getFixedStack(SSFI), 0,
+                         false, false, 0);
   }
 
   return Result;
@@ -5215,12 +5403,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
   SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
   SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
                               PseudoSourceValue::getConstantPool(), 0,
-                              false, 16);
+                              false, false, 16);
   SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
   SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
   SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
                               PseudoSourceValue::getConstantPool(), 0,
-                              false, 16);
+                              false, false, 16);
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
 
   // Add the halves; easiest way is to swap them into another reg first.
@@ -5307,9 +5495,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
                                    getPointerTy(), StackSlot, WordOff);
   SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                StackSlot, NULL, 0);
+                                StackSlot, NULL, 0, false, false, 0);
   SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
-                                OffsetSlot, NULL, 0);
+                                OffsetSlot, NULL, 0, false, false, 0);
   return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
 }
 
@@ -5357,7 +5545,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
     Chain = DAG.getStore(Chain, dl, Value, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0);
+                         PseudoSourceValue::getFixedStack(SSFI), 0,
+                         false, false, 0);
     SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
     SDValue Ops[] = {
       Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
@@ -5391,7 +5580,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0);
+                     FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
@@ -5401,7 +5590,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0);
+                     FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
@@ -5426,8 +5615,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                               PseudoSourceValue::getConstantPool(), 0,
-                               false, 16);
+                             PseudoSourceValue::getConstantPool(), 0,
+                             false, false, 16);
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
 
@@ -5453,8 +5642,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                               PseudoSourceValue::getConstantPool(), 0,
-                               false, 16);
+                             PseudoSourceValue::getConstantPool(), 0,
+                             false, false, 16);
   if (VT.isVector()) {
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
                        DAG.getNode(ISD::XOR, dl, MVT::v2i64,
@@ -5502,8 +5691,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
-                                false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, false, 16);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
 
   // Shift sign bit right or left if the two operands have different types.
@@ -5531,8 +5720,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
-                                false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, false, 16);
   SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
 
   // Or the value with the sign bit.
@@ -5919,6 +6108,29 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
       Cond = NewCond;
   }
 
+  // (select (x == 0), -1, 0) -> (sign_bit (x - 1))
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  if (Cond.getOpcode() == X86ISD::SETCC &&
+      cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) {
+    SDValue Cmp = Cond.getOperand(1);
+    if (Cmp.getOpcode() == X86ISD::CMP) {
+      ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1);
+      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+      ConstantSDNode *RHSC =
+        dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode());
+      if (N1C && N1C->isAllOnesValue() &&
+          N2C && N2C->isNullValue() &&
+          RHSC && RHSC->isNullValue()) {
+        SDValue CmpOp0 = Cmp.getOperand(0);
+        Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(),
+                          CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+        return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
+                           DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+      }
+    }
+  }
+
   // Look pass (and (setcc_carry (cmp ...)), 1).
   if (Cond.getOpcode() == ISD::AND &&
       Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
@@ -5971,10 +6183,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
 
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
-  SDValue Ops[] = { Op.getOperand(2), Op.getOperand(1), CC, Cond };
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+  SDValue Ops[] = { Op2, Op1, CC, Cond };
   return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops));
 }
 
@@ -6417,7 +6629,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                        false, false, 0);
   }
 
   // __va_list_tag:
@@ -6429,8 +6642,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
   SDValue Store = DAG.getStore(Op.getOperand(0), dl,
-                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
-                                 FIN, SV, 0);
+                               DAG.getConstant(VarArgsGPOffset, MVT::i32),
+                               FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store fp_offset
@@ -6438,21 +6651,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
                     FIN, DAG.getIntPtrConstant(4));
   Store = DAG.getStore(Op.getOperand(0), dl,
                        DAG.getConstant(VarArgsFPOffset, MVT::i32),
-                       FIN, SV, 0);
+                       FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0);
+  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+                       false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
   SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0);
+  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+                       false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                      &MemOps[0], MemOps.size());
@@ -6738,13 +6953,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0);
+                       NULL, 0, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0);
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -6756,7 +6971,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -6780,7 +6996,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
   SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
                                   DAG.getIntPtrConstant(-TD->getPointerSize()));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
-  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
   MF.getRegInfo().addLiveOut(StoreAddrReg);
 
@@ -6799,16 +7015,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
 
   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-  const X86InstrInfo *TII =
-    ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
-
   if (Subtarget->is64Bit()) {
     SDValue OutChains[6];
 
     // Large code-model.
-
-    const unsigned char JMP64r  = TII->getBaseOpcodeFor(X86::JMP64r);
-    const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+    const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.
+    const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
 
     const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
     const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
@@ -6819,11 +7031,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
     SDValue Addr = Trmp;
     OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 0);
+                                Addr, TrmpAddr, 0, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(2, MVT::i64));
-    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2);
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+                                false, false, 2);
 
     // Load the 'nest' parameter value into R10.
     // R10 is specified in X86CallingConv.td
@@ -6831,24 +7044,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(10, MVT::i64));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 10);
+                                Addr, TrmpAddr, 10, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(12, MVT::i64));
-    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2);
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+                                false, false, 2);
 
     // Jump to the nested function.
     OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(20, MVT::i64));
     OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 20);
+                                Addr, TrmpAddr, 20, false, false, 0);
 
     unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(22, MVT::i64));
     OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
-                                TrmpAddr, 22);
+                                TrmpAddr, 22, false, false, 0);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -6903,25 +7117,28 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
                        DAG.getConstant(10, MVT::i32));
     Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
 
-    const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+    // This is storing the opcode for MOV32ri.
+    const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
     const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
     OutChains[0] = DAG.getStore(Root, dl,
                                 DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
-                                Trmp, TrmpAddr, 0);
+                                Trmp, TrmpAddr, 0, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(1, MVT::i32));
-    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1);
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+                                false, false, 1);
 
-    const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+    const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(5, MVT::i32));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
-                                TrmpAddr, 5, false, 1);
+                                TrmpAddr, 5, false, false, 1);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(6, MVT::i32));
-    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1);
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+                                false, false, 1);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
@@ -6964,7 +7181,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                               DAG.getEntryNode(), StackSlot);
 
   // Load FP Control Word from stack slot
-  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0);
+  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
+                            false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -7238,6 +7456,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, DAG);
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -7327,7 +7546,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     if (FIST.getNode() != 0) {
       EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
+                                    false, false, 0));
     }
     return;
   }
@@ -7345,14 +7565,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(edx.getValue(1));
     return;
   }
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SREM:
-  case ISD::UREM: {
-    EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-    Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
-    return;
-  }
   case ISD::ATOMIC_CMP_SWAP: {
     EVT T = N->getValueType(0);
     assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
@@ -7551,7 +7763,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 
 
 bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
-  if (!Ty1->isInteger() || !Ty2->isInteger())
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
@@ -7572,7 +7784,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 
 bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit();
+  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
 }
 
 bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
@@ -7728,7 +7940,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
   MIB.addReg(EAXreg);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(bInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -7885,7 +8097,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   MIB.addReg(X86::EDX);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(bInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -7988,7 +8200,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   MIB.addReg(X86::EAX);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(mInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -8070,7 +8282,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   if (!Subtarget->isTargetWin64()) {
     // If %al is 0, branch around the XMM save block.
     BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
-    BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+    BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
     MBB->addSuccessor(EndMBB);
   }
 
@@ -8556,10 +8768,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
       return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                          LD->getSrcValue(), LD->getSrcValueOffset(),
-                         LD->isVolatile());
+                         LD->isVolatile(), LD->isNonTemporal(), 0);
     return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                        LD->getSrcValue(), LD->getSrcValueOffset(),
-                       LD->isVolatile(), LD->getAlignment());
+                       LD->isVolatile(), LD->isNonTemporal(),
+                       LD->getAlignment());
   } else if (NumElems == 4 && LastLoadedElt == 1) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
@@ -9278,7 +9491,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
                                   Ld->getBasePtr(), Ld->getSrcValue(),
                                   Ld->getSrcValueOffset(), Ld->isVolatile(),
-                                  Ld->getAlignment());
+                                  Ld->isNonTemporal(), Ld->getAlignment());
       SDValue NewChain = NewLd.getValue(1);
       if (TokenFactorIndex != -1) {
         Ops.push_back(NewChain);
@@ -9287,7 +9500,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       }
       return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
                           St->getSrcValue(), St->getSrcValueOffset(),
-                          St->isVolatile(), St->getAlignment());
+                          St->isVolatile(), St->isNonTemporal(),
+                          St->getAlignment());
     }
 
     // Otherwise, lower to two pairs of 32-bit loads / stores.
@@ -9297,10 +9511,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
                                Ld->getSrcValue(), Ld->getSrcValueOffset(),
-                               Ld->isVolatile(), Ld->getAlignment());
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               Ld->getAlignment());
     SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
                                Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
-                               Ld->isVolatile(),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
                                MinAlign(Ld->getAlignment(), 4));
 
     SDValue NewChain = LoLd.getValue(1);
@@ -9317,11 +9532,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
                                 St->getSrcValue(), St->getSrcValueOffset(),
-                                St->isVolatile(), St->getAlignment());
+                                St->isVolatile(), St->isNonTemporal(),
+                                St->getAlignment());
     SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
                                 St->getSrcValue(),
                                 St->getSrcValueOffset() + 4,
                                 St->isVolatile(),
+                                St->isNonTemporal(),
                                 MinAlign(St->getAlignment(), 4));
     return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
   }
@@ -9504,7 +9721,7 @@ static bool LowerToBSwap(CallInst *CI) {
   // Verify this is a simple bswap.
   if (CI->getNumOperands() != 2 ||
       CI->getType() != CI->getOperand(1)->getType() ||
-      !CI->getType()->isInteger())
+      !CI->getType()->isIntegerTy())
     return false;
 
   const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
@@ -9553,7 +9770,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
       return LowerToBSwap(CI);
     }
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
-    if (CI->getType()->isInteger(16) &&
+    if (CI->getType()->isIntegerTy(16) &&
         AsmPieces.size() == 3 &&
         AsmPieces[0] == "rorw" &&
         AsmPieces[1] == "$$8," &&
@@ -9563,7 +9780,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     }
     break;
   case 3:
-    if (CI->getType()->isInteger(64) &&
+    if (CI->getType()->isIntegerTy(64) &&
         Constraints.size() >= 2 &&
         Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
         Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 64bc70c..cf0eb40 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -19,6 +19,7 @@
 #include "X86RegisterInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -156,6 +157,11 @@ namespace llvm {
       /// relative displacements.
       WrapperRIP,
 
+      /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
+      /// Can be used to move a vector value from a MMX register to a XMM
+      /// register.
+      MOVQ2DQ,
+
       /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
@@ -366,25 +372,33 @@ namespace llvm {
     unsigned VarArgsGPOffset;         // X86-64 vararg func int reg offset.
     unsigned VarArgsFPOffset;         // X86-64 vararg func fp reg offset.
     int BytesToPopOnReturn;           // Number of arg bytes ret should pop.
-    int BytesCallerReserves;          // Number of arg bytes caller makes.
 
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
+    /// getPICBaseSymbol - Return the X86-32 PIC base.
+    MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const;
+    
+    virtual unsigned getJumpTableEncoding() const;
+
+    virtual const MCExpr *
+    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                              const MachineBasicBlock *MBB, unsigned uid,
+                              MCContext &Ctx) const;
+    
     /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
     /// jumptable.
-    SDValue getPICJumpTableRelocBase(SDValue Table,
-                                       SelectionDAG &DAG) const;
-
+    virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                             SelectionDAG &DAG) const;
+    virtual const MCExpr *
+    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                 unsigned JTI, MCContext &Ctx) const;
+    
     // Return the number of bytes that a function should pop when it returns (in
     // addition to the space used by the return address).
     //
     unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
 
-    // Return the number of bytes that the caller reserves for arguments passed
-    // to this function.
-    unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
- 
     /// getStackPtrReg - Return the stack pointer register we are using: either
     /// ESP or RSP.
     unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -532,16 +546,6 @@ namespace llvm {
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
     
-    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
-    /// for tail call optimization. Targets which want to do tail call
-    /// optimization should implement this function.
-    virtual bool
-    IsEligibleForTailCallOptimization(SDValue Callee,
-                                      CallingConv::ID CalleeCC,
-                                      bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      SelectionDAG& DAG) const;
-
     virtual const X86Subtarget* getSubtarget() {
       return Subtarget;
     }
@@ -619,13 +623,22 @@ namespace llvm {
                              ISD::ArgFlagsTy Flags);
 
     // Call lowering helpers.
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                           CallingConv::ID CalleeCC,
+                                           bool isVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                           SelectionDAG& DAG) const;
     bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv);
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
                                 int FPDiff, DebugLoc dl);
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
-    NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv);
     unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
@@ -634,6 +647,7 @@ namespace llvm {
     SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
                                    SelectionDAG &DAG);
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
@@ -693,7 +707,7 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals);
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 9037ba6..4ea3739 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -187,7 +187,7 @@ def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
 
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst),
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops),
                    "jmp{q}\t{*}$dst  # TAILCALL",
                    []>;     
 
@@ -435,7 +435,7 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
 // up to 64 bits.
 def def32 : PatLeaf<(i32 GR32:$src), [{
   return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
          N->getOpcode() != ISD::CopyFromReg &&
          N->getOpcode() != X86ISD::CMOV;
 }]>;
@@ -893,35 +893,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
 let isTwoAddress = 1 in {
 def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
                  "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-}
 def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
                    "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), 
-                   (ins i64mem:$src, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
 
 def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
                  "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
 let Uses = [CL] in {
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
 def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
                   "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), 
-                   (ins i64mem:$src, i8imm:$cnt),
+}
+
+let isTwoAddress = 0 in {
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
                    "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
 }
 
 let isTwoAddress = 1 in {
@@ -1466,9 +1469,13 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem,       // if !overflow, GR64 = [mem64]
 } // isTwoAddress
 
 // Use sbb to materialize carry flag into a GPR.
+// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
 let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins),
-                  "sbb{q}\t$dst, $dst",
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
                  [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
 
 def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
@@ -1606,8 +1613,7 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
 // when we have a better way to specify isel priority.
 let Defs = [EFLAGS],
     AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
-                 "",
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
                  [(set GR64:$dst, 0)]>;
 
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
@@ -1768,7 +1774,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
 def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
-def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB;
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
 
 def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
                  "push{q}\t%fs", []>, TB;
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 71ec178..e22a903 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -339,7 +339,6 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
 def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">;
 def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">;
 
-def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">;
 def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
 def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
 def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a799f16..bb81cbf 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -29,7 +29,16 @@ def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
 def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
 def MRM6m  : Format<30>; def MRM7m  : Format<31>;
 def MRMInitReg : Format<32>;
-
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -37,11 +46,13 @@ def MRMInitReg : Format<32>;
 class ImmType<bits<3> val> {
   bits<3> Value = val;
 }
-def NoImm  : ImmType<0>;
-def Imm8   : ImmType<1>;
-def Imm16  : ImmType<2>;
-def Imm32  : ImmType<3>;
-def Imm64  : ImmType<4>;
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm16      : ImmType<3>;
+def Imm32      : ImmType<4>;
+def Imm32PCRel : ImmType<5>;
+def Imm64      : ImmType<6>;
 
 // FPFormat - This specifies what form this FP instruction has.  This is used by
 // the Floating-Point stackifier pass.
@@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
   let Pattern = pattern;
   let CodeSize = 3;
 }
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
 class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
            list<dag> pattern>
   : X86Inst<o, f, Imm16, outs, ins, asm> {
@@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
   let CodeSize = 3;
 }
 
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
 // FPStack Instruction Templates:
 // FPI - Floating Point Instruction template.
 class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 0000000..6b9478d
--- /dev/null
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,62 @@
+//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
+
+def bc_v8i8  : PatFrag<(ops node:$in), (v8i8  (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// MMX Masks
+//===----------------------------------------------------------------------===//
+
+// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
+// PSHUFW imm.
+def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
+def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+                         (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
+def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+                         (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                               (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                               (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
+                         (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], MMX_SHUFFLE_get_shuf_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 3ae352c..a0d0312 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1060,8 +1060,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  unsigned DestReg, unsigned SubIdx,
                                  const MachineInstr *Orig,
                                  const TargetRegisterInfo *TRI) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (I != MBB.end()) DL = I->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(I);
 
   if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
     DestReg = TRI->getSubReg(DestReg, SubIdx);
@@ -1588,44 +1587,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
   switch (BrOpc) {
   default: return X86::COND_INVALID;
-  case X86::JE:  return X86::COND_E;
-  case X86::JNE: return X86::COND_NE;
-  case X86::JL:  return X86::COND_L;
-  case X86::JLE: return X86::COND_LE;
-  case X86::JG:  return X86::COND_G;
-  case X86::JGE: return X86::COND_GE;
-  case X86::JB:  return X86::COND_B;
-  case X86::JBE: return X86::COND_BE;
-  case X86::JA:  return X86::COND_A;
-  case X86::JAE: return X86::COND_AE;
-  case X86::JS:  return X86::COND_S;
-  case X86::JNS: return X86::COND_NS;
-  case X86::JP:  return X86::COND_P;
-  case X86::JNP: return X86::COND_NP;
-  case X86::JO:  return X86::COND_O;
-  case X86::JNO: return X86::COND_NO;
+  case X86::JE_4:  return X86::COND_E;
+  case X86::JNE_4: return X86::COND_NE;
+  case X86::JL_4:  return X86::COND_L;
+  case X86::JLE_4: return X86::COND_LE;
+  case X86::JG_4:  return X86::COND_G;
+  case X86::JGE_4: return X86::COND_GE;
+  case X86::JB_4:  return X86::COND_B;
+  case X86::JBE_4: return X86::COND_BE;
+  case X86::JA_4:  return X86::COND_A;
+  case X86::JAE_4: return X86::COND_AE;
+  case X86::JS_4:  return X86::COND_S;
+  case X86::JNS_4: return X86::COND_NS;
+  case X86::JP_4:  return X86::COND_P;
+  case X86::JNP_4: return X86::COND_NP;
+  case X86::JO_4:  return X86::COND_O;
+  case X86::JNO_4: return X86::COND_NO;
   }
 }
 
 unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Illegal condition code!");
-  case X86::COND_E:  return X86::JE;
-  case X86::COND_NE: return X86::JNE;
-  case X86::COND_L:  return X86::JL;
-  case X86::COND_LE: return X86::JLE;
-  case X86::COND_G:  return X86::JG;
-  case X86::COND_GE: return X86::JGE;
-  case X86::COND_B:  return X86::JB;
-  case X86::COND_BE: return X86::JBE;
-  case X86::COND_A:  return X86::JA;
-  case X86::COND_AE: return X86::JAE;
-  case X86::COND_S:  return X86::JS;
-  case X86::COND_NS: return X86::JNS;
-  case X86::COND_P:  return X86::JP;
-  case X86::COND_NP: return X86::JNP;
-  case X86::COND_O:  return X86::JO;
-  case X86::COND_NO: return X86::JNO;
+  case X86::COND_E:  return X86::JE_4;
+  case X86::COND_NE: return X86::JNE_4;
+  case X86::COND_L:  return X86::JL_4;
+  case X86::COND_LE: return X86::JLE_4;
+  case X86::COND_G:  return X86::JG_4;
+  case X86::COND_GE: return X86::JGE_4;
+  case X86::COND_B:  return X86::JB_4;
+  case X86::COND_BE: return X86::JBE_4;
+  case X86::COND_A:  return X86::JA_4;
+  case X86::COND_AE: return X86::JAE_4;
+  case X86::COND_S:  return X86::JS_4;
+  case X86::COND_NS: return X86::JNS_4;
+  case X86::COND_P:  return X86::JP_4;
+  case X86::COND_NP: return X86::JNP_4;
+  case X86::COND_O:  return X86::JO_4;
+  case X86::COND_NO: return X86::JNO_4;
   }
 }
 
@@ -1695,7 +1694,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return true;
 
     // Handle unconditional branches.
-    if (I->getOpcode() == X86::JMP) {
+    if (I->getOpcode() == X86::JMP_4) {
       if (!AllowModify) {
         TBB = I->getOperand(0).getMBB();
         continue;
@@ -1779,7 +1778,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   while (I != MBB.begin()) {
     --I;
-    if (I->getOpcode() != X86::JMP &&
+    if (I->getOpcode() != X86::JMP_4 &&
         GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
       break;
     // Remove the branch.
@@ -1805,7 +1804,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   if (Cond.empty()) {
     // Unconditional branch?
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB);
     return 1;
   }
 
@@ -1815,16 +1814,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   switch (CC) {
   case X86::COND_NP_OR_E:
     // Synthesize NP_OR_E with two branches.
-    BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
     ++Count;
-    BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
     ++Count;
     break;
   case X86::COND_NE_OR_P:
     // Synthesize NE_OR_P with two branches.
-    BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
     ++Count;
-    BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
     ++Count;
     break;
   default: {
@@ -1835,7 +1834,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   }
   if (FBB) {
     // Two-way Conditional branch. Insert the second branch.
-    BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+    BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
     ++Count;
   }
   return Count;
@@ -1851,8 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                 unsigned DestReg, unsigned SrcReg,
                                 const TargetRegisterClass *DestRC,
                                 const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
 
   // Determine if DstRC and SrcRC have a common superclass in common.
   const TargetRegisterClass *CommonRC = DestRC;
@@ -2079,8 +2077,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   const MachineFunction &MF = *MBB.getParent();
   bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
     .addReg(SrcReg, getKillRegState(isKill));
 }
@@ -2173,8 +2170,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   const MachineFunction &MF = *MBB.getParent();
   bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
 }
 
@@ -3018,22 +3014,11 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
            RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
 }
 
-unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
-  switch (Desc->TSFlags & X86II::ImmMask) {
-  case X86II::Imm8:   return 1;
-  case X86II::Imm16:  return 2;
-  case X86II::Imm32:  return 4;
-  case X86II::Imm64:  return 8;
-  default: llvm_unreachable("Immediate size not set!");
-    return 0;
-  }
-}
 
-/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
-/// e.g. r8, xmm8, etc.
-bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) {
-  if (!MO.isReg()) return false;
-  switch (MO.getReg()) {
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
+/// register?  e.g. r8, xmm8, xmm13, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
+  switch (RegNo) {
   default: break;
   case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
   case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
@@ -3387,24 +3372,24 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     switch (Opcode) {
     default: 
       break;
-    case TargetInstrInfo::INLINEASM: {
+    case TargetOpcode::INLINEASM: {
       const MachineFunction *MF = MI.getParent()->getParent();
       const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
       FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
                                           *MF->getTarget().getMCAsmInfo());
       break;
     }
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
       break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::KILL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
     case X86::FP_REG_KILL:
       break;
     case X86::MOVPC32r: {
       // This emits the "call" portion of this pseudo instruction.
       ++FinalSize;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
       break;
     }
     }
@@ -3422,7 +3407,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       } else if (MO.isSymbol()) {
         FinalSize += sizeExternalSymbolAddress(false);
       } else if (MO.isImm()) {
-        FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+        FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
       } else {
         llvm_unreachable("Unknown RawFrm operand!");
       }
@@ -3435,7 +3420,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     
     if (CurOp != NumOps) {
       const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
       if (MO1.isImm())
         FinalSize += sizeConstant(Size);
       else {
@@ -3460,7 +3445,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     CurOp += 2;
     if (CurOp != NumOps) {
       ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
     }
     break;
   }
@@ -3470,7 +3455,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     CurOp +=  X86AddrNumOperands + 1;
     if (CurOp != NumOps) {
       ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
     }
     break;
   }
@@ -3481,7 +3466,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     CurOp += 2;
     if (CurOp != NumOps) {
       ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
     }
     break;
 
@@ -3498,7 +3483,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
     CurOp += AddrOperands + 1;
     if (CurOp != NumOps) {
       ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
     }
     break;
   }
@@ -3523,7 +3508,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
 
     if (CurOp != NumOps) {
       const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
       if (MO1.isImm())
         FinalSize += sizeConstant(Size);
       else {
@@ -3553,7 +3538,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
 
     if (CurOp != NumOps) {
       const MachineOperand &MO = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
       if (MO.isImm())
         FinalSize += sizeConstant(Size);
       else {
@@ -3571,6 +3556,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       }
     }
     break;
+    
+  case X86II::MRM_C1:
+  case X86II::MRM_C8:
+  case X86II::MRM_C9:
+  case X86II::MRM_E8:
+  case X86II::MRM_F0:
+    FinalSize += 2;
+    break;
   }
 
   case X86II::MRMInitReg:
@@ -3619,8 +3612,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   // Insert the set of GlobalBaseReg into the first MBB of the function
   MachineBasicBlock &FirstMBB = MF->front();
   MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc();
+  DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
   
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4f35d0d..5111719 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -18,7 +18,6 @@
 #include "X86.h"
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class X86RegisterInfo;
@@ -269,6 +268,18 @@ namespace X86II {
     // MRMInitReg - This form is used for instructions whose source and
     // destinations are the same register.
     MRMInitReg = 32,
+    
+    //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+    MRM_C1 = 33,
+    MRM_C2 = 34,
+    MRM_C3 = 35,
+    MRM_C4 = 36,
+    MRM_C8 = 37,
+    MRM_C9 = 38,
+    MRM_E8 = 39,
+    MRM_F0 = 40,
+    MRM_F8 = 41,
+    MRM_F9 = 42,
 
     FormMask       = 63,
 
@@ -332,11 +343,13 @@ namespace X86II {
     // This three-bit field describes the size of an immediate operand.  Zero is
     // unused so that we can tell if we forgot to set a value.
     ImmShift = 13,
-    ImmMask  = 7 << ImmShift,
-    Imm8     = 1 << ImmShift,
-    Imm16    = 2 << ImmShift,
-    Imm32    = 3 << ImmShift,
-    Imm64    = 4 << ImmShift,
+    ImmMask    = 7 << ImmShift,
+    Imm8       = 1 << ImmShift,
+    Imm8PCRel  = 2 << ImmShift,
+    Imm16      = 3 << ImmShift,
+    Imm32      = 4 << ImmShift,
+    Imm32PCRel = 5 << ImmShift,
+    Imm64      = 6 << ImmShift,
 
     //===------------------------------------------------------------------===//
     // FP Instruction Classification...  Zero is non-fp instruction.
@@ -389,6 +402,47 @@ namespace X86II {
     OpcodeShift   = 24,
     OpcodeMask    = 0xFF << OpcodeShift
   };
+  
+  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+  // specified machine instruction.
+  //
+  static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) {
+    return TSFlags >> X86II::OpcodeShift;
+  }
+  
+  static inline bool hasImm(unsigned TSFlags) {
+    return (TSFlags & X86II::ImmMask) != 0;
+  }
+  
+  /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+  /// of the specified instruction.
+  static inline unsigned getSizeOfImm(unsigned TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+    default: assert(0 && "Unknown immediate size");
+    case X86II::Imm8:
+    case X86II::Imm8PCRel:  return 1;
+    case X86II::Imm16:      return 2;
+    case X86II::Imm32:
+    case X86II::Imm32PCRel: return 4;
+    case X86II::Imm64:      return 8;
+    }
+  }
+  
+  /// isImmPCRel - Return true if the immediate of the specified instruction's
+  /// TSFlags indicates that it is pc relative.
+  static inline unsigned isImmPCRel(unsigned TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+      default: assert(0 && "Unknown immediate size");
+      case X86II::Imm8PCRel:
+      case X86II::Imm32PCRel:
+        return true;
+      case X86II::Imm8:
+      case X86II::Imm16:
+      case X86II::Imm32:
+      case X86II::Imm64:
+        return false;
+    }
+  }    
 }
 
 const int X86AddrNumOperands = 5;
@@ -637,25 +691,21 @@ public:
   /// instruction that defines the specified register class.
   bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
 
-  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
-  // specified machine instruction.
-  //
-  unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const {
-    return TID->TSFlags >> X86II::OpcodeShift;
-  }
-  unsigned char getBaseOpcodeFor(unsigned Opcode) const {
-    return getBaseOpcodeFor(&get(Opcode));
-  }
-  
   static bool isX86_64NonExtLowByteReg(unsigned reg) {
     return (reg == X86::SPL || reg == X86::BPL ||
           reg == X86::SIL || reg == X86::DIL);
   }
   
-  static unsigned sizeOfImm(const TargetInstrDesc *Desc);
-  static bool isX86_64ExtendedReg(const MachineOperand &MO);
+  static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+    if (!MO.isReg()) return false;
+    return isX86_64ExtendedReg(MO.getReg());
+  }
   static unsigned determineREX(const MachineInstr &MI);
 
+  /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+  /// higher) register?  e.g. r8, xmm8, xmm13, etc.
+  static bool isX86_64ExtendedReg(unsigned RegNo);
+
   /// GetInstSize - Returns the size of the specified MachineInstr.
   ///
   virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 396cb53..25cd297 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -182,10 +182,6 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 // X86 Operand Definitions.
 //
 
-def i32imm_pcrel : Operand<i32> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
 // A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
 // the index operand of an address, to conform to x86 encoding restrictions.
 def ptr_rc_nosp : PointerLikeRegClass<1>;
@@ -196,6 +192,14 @@ def X86MemAsmOperand : AsmOperandClass {
   let Name = "Mem";
   let SuperClass = ?;
 }
+def X86AbsMemAsmOperand : AsmOperandClass {
+  let Name = "AbsMem";
+  let SuperClass = X86MemAsmOperand;
+}
+def X86NoSegMemAsmOperand : AsmOperandClass {
+  let Name = "NoSegMem";
+  let SuperClass = X86MemAsmOperand;
+}
 class X86MemOperand<string printMethod> : Operand<iPTR> {
   let PrintMethod = printMethod;
   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
@@ -207,11 +211,6 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
 def opaque80mem : X86MemOperand<"printopaquemem">;
 def opaque512mem : X86MemOperand<"printopaquemem">;
 
-def offset8 : Operand<i64>  { let PrintMethod = "print_pcrel_imm"; }
-def offset16 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-def offset32 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-def offset64 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-
 def i8mem   : X86MemOperand<"printi8mem">;
 def i16mem  : X86MemOperand<"printi16mem">;
 def i32mem  : X86MemOperand<"printi32mem">;
@@ -235,7 +234,22 @@ def i8mem_NOREX : Operand<i64> {
 def lea32mem : Operand<i32> {
   let PrintMethod = "printlea32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86NoSegMemAsmOperand;
+}
+
+let ParserMatchClass = X86AbsMemAsmOperand,
+    PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
 }
 
 def SSECC : Operand<i8> {
@@ -257,15 +271,6 @@ def i32i8imm  : Operand<i32> {
   let ParserMatchClass = ImmSExt8AsmOperand;
 }
 
-// Branch targets have OtherVT type and print as pc-relative values.
-def brtarget : Operand<OtherVT> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
-def brtarget8 : Operand<OtherVT> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
 //
@@ -591,7 +596,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
                       "", []>;
 
 //===----------------------------------------------------------------------===//
-//  Control Flow Instructions...
+//  Control Flow Instructions.
 //
 
 // Return instructions.
@@ -609,16 +614,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
                     "lret\t$amt", []>;
 }
 
-// All branches are RawFrm, Void, Branch, and Terminators
-let isBranch = 1, isTerminator = 1 in
-  class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
-        I<opcode, RawFrm, (outs), ins, asm, pattern>;
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                        "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", []>;
+}
 
-let isBranch = 1, isBarrier = 1 in {
-  def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
-  def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>;
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+  }
 }
 
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// FIXME: What about the CX/RCX versions of this instruction?
+let Uses = [ECX], isBranch = 1, isTerminator = 1 in
+  def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jcxz\t$dst", []>;
+
+
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
@@ -639,63 +674,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                      "ljmp{l}\t{*}$dst", []>;
 }
 
-// Conditional branches
-let Uses = [EFLAGS] in {
-// Short conditional jumps
-def JO8   : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>;
-def JNO8  : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>;
-def JB8   : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>;
-def JAE8  : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>;
-def JE8   : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>;
-def JNE8  : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>;
-def JBE8  : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>;
-def JA8   : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>;
-def JS8   : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>;
-def JNS8  : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>;
-def JP8   : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>;
-def JNP8  : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>;
-def JL8   : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>;
-def JGE8  : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>;
-def JLE8  : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>;
-def JG8   : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>;
-
-def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>;
-
-def JE  : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
-              [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
-def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB;
-def JL  : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
-              [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB;
-def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
-              [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB;
-def JG  : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
-              [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB;
-def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
-              [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB;
-
-def JB  : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
-              [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB;
-def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
-              [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB;
-def JA  : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
-              [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB;
-def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
-              [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB;
-
-def JS  : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
-              [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB;
-def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB;
-def JP  : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
-              [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB;
-def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB;
-def JO  : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
-              [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB;
-def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
-} // Uses = [EFLAGS]
 
 // Loop instructions
 
@@ -716,7 +694,7 @@ let isCall = 1 in
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
       Uses = [ESP] in {
-    def CALLpcrel32 : Ii32<0xE8, RawFrm,
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
                            (outs), (ins i32imm_pcrel:$dst,variable_ops),
                            "call\t$dst", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
@@ -756,15 +734,18 @@ def TCRETURNri : I<0, Pseudo, (outs),
                  "#TC_RETURN $dst $offset",
                  []>;
 
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
+// FIXME: The should be pseudo instructions that are lowered when going to
+// mcinst.
+let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+  def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
+                 "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), 
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), 
                    "jmp{l}\t{*}$dst  # TAILCALL",
                  []>;     
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops),
                    "jmp\t{*}$dst  # TAILCALL", []>;
 
 //===----------------------------------------------------------------------===//
@@ -877,7 +858,7 @@ def LEA32r   : I<0x8D, MRMSrcMem,
                  "lea{l}\t{$src|$dst}, {$dst|$src}",
                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
 
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in {
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
 def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
                   [(X86rep_movs i8)]>, REP;
 def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
@@ -886,16 +867,31 @@ def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
                   [(X86rep_movs i32)]>, REP;
 }
 
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+}
+
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
                   [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
                   [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
                   [(X86rep_stos i32)]>, REP;
 
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+
 def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
 def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
 def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
@@ -908,6 +904,9 @@ let Defs = [RAX, RDX] in
 def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
             TB;
 
+let Defs = [RAX, RCX, RDX] in
+def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
 let isBarrier = 1, hasCtrlDep = 1 in {
 def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
 }
@@ -996,6 +995,7 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, imm:$src)]>;
 }
+
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
                    [(store (i8 imm:$src), addr:$dst)]>;
@@ -2306,98 +2306,100 @@ let isTwoAddress = 0 in {
 
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
                "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
                 "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
                  "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
 def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
                 "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
 def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
                  "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
 def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
                   "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), 
-                  (ins i16mem:$src, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
 def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
                 "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
                  "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
                   "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), 
-                  (ins i32mem:$src, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
                   
 def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
                "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
                 "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
                  "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
 def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
                 "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
 def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
                  "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
 def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
                   "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), 
-                  (ins i16mem:$src, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
 def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
                 "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
                  "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), 
-                  (ins i32mem:$src, i8imm:$cnt),
+
+let isTwoAddress = 0 in {
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+               "rcl{b}\t{1, $dst|$dst, 1}", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+                "rcl{l}\t{1, $dst|$dst, 1}", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+               "rcr{b}\t{1, $dst|$dst, 1}", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+                "rcr{l}\t{1, $dst|$dst, 1}", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
 
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+}
+
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
@@ -3006,8 +3008,8 @@ let isTwoAddress = 0 in {
   def SBB32mr  : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
                    [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def SBB8mi  : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
-                      "sbb{b}\t{$src2, $dst|$dst, $src2}",
+  def SBB8mi  : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
+                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
   def SBB16mi  : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), 
                       "sbb{w}\t{$src2, $dst|$dst, $src2}",
@@ -3234,17 +3236,18 @@ def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
 
 let Uses = [EFLAGS] in {
 // Use sbb to materialize carry bit.
-
 let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
-                 "sbb{b}\t$dst, $dst",
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
                  [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
-                  "sbb{w}\t$dst, $dst",
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
                  [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
                 OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
-                  "sbb{l}\t$dst, $dst",
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
 } // isCodeGenOnly
 
@@ -3681,7 +3684,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
 
-// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
 // except that they use GR32_NOREX for the output operand register class
 // instead of GR32. This allows them to operate on h registers on x86-64.
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
@@ -3716,10 +3719,10 @@ let neverHasSideEffects = 1 in {
 
 // Alias instructions that map movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in {
-def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
-                 "xor{b}\t$dst, $dst",
+def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
                  [(set GR8:$dst, 0)]>;
 
 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
@@ -3731,8 +3734,8 @@ def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
                  "",
                  [(set GR16:$dst, 0)]>, OpSize;
                  
-def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
-                 "xor{l}\t$dst, $dst",
+// FIXME: Set encoding to pseudo.
+def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)]>;
 }
 
@@ -4077,7 +4080,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
                 
-def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB;
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
 
 def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
              "str{w}\t{$dst}", []>, TB;
@@ -4155,6 +4158,26 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
 def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
                 "lldt{w}\t$src", []>, TB;
                 
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs),  (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
+}
+
+// Segment override instruction prefixes
+def CS_PREFIX : I<0x2E, RawFrm, (outs),  (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs),  (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs),  (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs),  (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs),  (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs),  (ins), "gs", []>;
+
 // String manipulation instructions
 
 def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
@@ -4219,17 +4242,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
 // VMX instructions
 
 // 66 0F 38 80
-def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB;
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
 // 66 0F 38 81
-def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB;
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
 // 0F 01 C1
-def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB;
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
 def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
   "vmclear\t$vmcs", []>, OpSize, TB;
 // 0F 01 C2
-def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB;
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
 // 0F 01 C3
-def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB;
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
 def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
   "vmptrld\t$vmcs", []>, TB;
 def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
@@ -4251,7 +4274,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
   "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
 // 0F 01 C4
-def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize;
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
 def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
   "vmxon\t{$vmxon}", []>, XD;
 
@@ -5181,6 +5204,12 @@ include "X86InstrFPStack.td"
 include "X86Instr64bit.td"
 
 //===----------------------------------------------------------------------===//
+// SIMD support (SSE, MMX and AVX)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFragmentsSIMD.td"
+
+//===----------------------------------------------------------------------===//
 // XMM Floating point support (requires SSE / SSE2)
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index fc40c9a..89f020c 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -14,56 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// MMX Pattern Fragments
-//===----------------------------------------------------------------------===//
-
-def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
-
-def bc_v8i8  : PatFrag<(ops node:$in), (v8i8  (bitconvert node:$in))>;
-def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
-def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
-def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
-
-//===----------------------------------------------------------------------===//
-// MMX Masks
-//===----------------------------------------------------------------------===//
-
-// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
-// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], MMX_SHUFFLE_get_shuf_imm>;
-
-//===----------------------------------------------------------------------===//
 // MMX Multiclasses
 //===----------------------------------------------------------------------===//
 
@@ -501,6 +451,20 @@ let Constraints = "$src1 = $dst" in {
                                (iPTR imm:$src3))))]>;
 }
 
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+          (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
+                            (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+          (v2i64 (MOVDI2PDIrm addr:$src))>;
+
 // Mask creation
 def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
                           "pmovmskb\t{$src, $dst|$dst, $src}",
@@ -522,11 +486,10 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
 
 // Alias instructions that map zero vector to pxor.
 let isReMaterializable = 1, isCodeGenOnly = 1 in {
-  def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
-                              "pxor\t$dst, $dst",
+  // FIXME: Change encoding to pseudo.
+  def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
                               [(set VR64:$dst, (v2i32 immAllZerosV))]>;
-  def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins),
-                              "pcmpeqd\t$dst, $dst",
+  def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
                               [(set VR64:$dst, (v2i32 immAllOnesV))]>;
 }
 
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 94b9b55..9b2140f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -505,9 +505,10 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
 // Alias instructions that map fld0 to pxor for sse.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
     canFoldAsLoad = 1 in
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
-                 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
-               Requires<[HasSSE1]>, TB, OpSize;
+  // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                 [(set FR32:$dst, fp32imm0)]>,
+                 Requires<[HasSSE1]>, TB, OpSize;
 
 // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
 // disregarded.
@@ -761,6 +762,9 @@ let Constraints = "$src1 = $dst" in {
 } // Constraints = "$src1 = $dst"
 
 
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1025,10 +1029,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
 // Alias instructions that map zero vector to pxor / xorp* for sse.
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
     isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
-                 "xorps\t$dst, $dst",
+def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
                  [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 
 let Predicates = [HasSSE1] in {
@@ -1269,8 +1273,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
 // Alias instructions that map fld0 to pxor for sse.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
     canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
-                 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                 [(set FR64:$dst, fpimm0)]>,
                Requires<[HasSSE2]>, TB, OpSize;
 
 // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
@@ -2311,9 +2315,9 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
               TB, Requires<[HasSSE2]>;
 
 // Load, store, and memory fence
-def LFENCE : I<0xAE, MRM5r, (outs), (ins),
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
                "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM6r, (outs), (ins),
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
 
 //TODO: custom lower this so as to never even generate the noop
@@ -2329,8 +2333,8 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
 // load of an all-ones value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
     isCodeGenOnly = 1 in
-  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
-                         "pcmpeqd\t$dst, $dst",
+  // FIXME: Change encoding to pseudo.
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
 
 // FR64 to 128-bit vector conversion.
@@ -2612,9 +2616,9 @@ let Constraints = "$src1 = $dst" in {
 }
 
 // Thread synchronization
-def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor",
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
                 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT   : I<0x01, MRM1r, (outs), (ins), "mwait",
+def MWAIT   : I<0x01, MRM_C9, (outs), (ins), "mwait",
                 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
 
 // vector_shuffle v1, <undef> <1, 1, 3, 3>
@@ -3746,7 +3750,8 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
 
 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+                       OpSize;
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index f363903..d297d24 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -297,6 +297,7 @@ extern "C" {
       push  edx
       push  ecx
       and   esp, -16
+      sub   esp, 16
       mov   eax, dword ptr [ebp+4]
       mov   dword ptr [esp+4], eax
       mov   dword ptr [esp], ebp
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 1738d49..91c0fbb 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -55,9 +55,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   if (!is64Bit)
     Data64bitsDirective = 0;       // we can't emit a 64-bit unit
 
-  // Leopard and above support aligned common symbols.
-  COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9;
-
   CommentString = "##";
   PCSymbol = ".";
 
@@ -75,7 +72,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
 
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
-  SetDirective = "\t.set\t";
   PCSymbol = ".";
 
   // Set up DWARF directives
@@ -98,27 +94,4 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
-}
-
-
-X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) {
-  AsmTransCBE = x86_asm_table;
-  AssemblerDialect = AsmWriterFlavor;
-
-  GlobalPrefix = "_";
-  CommentString = ";";
-
-  PrivateGlobalPrefix = "$";
-  AlignDirective = "\tALIGN\t";
-  ZeroDirective = "\tdb\t";
-  AsciiDirective = "\tdb\t";
-  AscizDirective = 0;
-  Data8bitsDirective = "\tdb\t";
-  Data16bitsDirective = "\tdw\t";
-  Data32bitsDirective = "\tdd\t";
-  Data64bitsDirective = "\tdq\t";
-  HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-
-  AlignmentIsInBytes = true;
-}
+}
+\ No newline at end of file
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index ca227b7..69716bf 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -33,11 +33,6 @@ namespace llvm {
   struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
     explicit X86MCAsmInfoCOFF(const Triple &Triple);
   };
-
-  struct X86WinMCAsmInfo : public MCAsmInfo {
-    explicit X86WinMCAsmInfo(const Triple &Triple);
-  };
-
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
new file mode 100644
index 0000000..3f18696
--- /dev/null
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -0,0 +1,645 @@
+//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  MCContext &Ctx;
+  bool Is64BitMode;
+public:
+  X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) 
+    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+    Is64BitMode = is64Bit;
+  }
+
+  ~X86MCCodeEmitter() {}
+
+  unsigned getNumFixupKinds() const {
+    return 3;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[] = {
+      { "reloc_pcrel_4byte", 0, 4 * 8 },
+      { "reloc_pcrel_1byte", 0, 1 * 8 },
+      { "reloc_riprel_4byte", 0, 4 * 8 }
+    };
+    
+    if (Kind < FirstTargetFixupKind)
+      return MCCodeEmitter::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+  
+  static unsigned GetX86RegNum(const MCOperand &MO) {
+    return X86RegisterInfo::getX86RegNum(MO.getReg());
+  }
+  
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+  
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitImmediate(const MCOperand &Disp, 
+                     unsigned ImmSize, MCFixupKind FixupKind,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     int ImmOffset = 0) const;
+  
+  inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+                                        unsigned RM) {
+    assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+    return RM | (RegOpcode << 3) | (Mod << 6);
+  }
+  
+  void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+                        unsigned &CurByte, raw_ostream &OS) const {
+    EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+  }
+  
+  void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+                   unsigned &CurByte, raw_ostream &OS) const {
+    // SIB byte is in the same format as the ModRMByte.
+    EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+  }
+  
+  
+  void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                        unsigned RegOpcodeField, 
+                        unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
+                        SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+  
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, false);
+}
+
+MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, true);
+}
+
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit 
+/// sign-extended field. 
+static bool isDisp8(int Value) {
+  return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+  unsigned Size = X86II::getSizeOfImm(TSFlags);
+  bool isPCRel = X86II::isImmPCRel(TSFlags);
+  
+  switch (Size) {
+  default: assert(0 && "Unknown immediate size");
+  case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
+  case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
+  case 2: assert(!isPCRel); return FK_Data_2;
+  case 8: assert(!isPCRel); return FK_Data_8;
+  }
+}
+
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+              unsigned &CurByte, raw_ostream &OS,
+              SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+  // If this is a simple integer displacement that doesn't require a relocation,
+  // emit it now.
+  if (DispOp.isImm()) {
+    // FIXME: is this right for pc-rel encoding??  Probably need to emit this as
+    // a fixup if so.
+    EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+    return;
+  }
+
+  // If we have an immoffset, add it to the expression.
+  const MCExpr *Expr = DispOp.getExpr();
+  
+  // If the fixup is pc-relative, we need to bias the value to be relative to
+  // the start of the field, not the end of the field.
+  if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte))
+    ImmOffset -= 4;
+  if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+    ImmOffset -= 1;
+  
+  if (ImmOffset)
+    Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+                                   Ctx);
+  
+  // Emit a symbolic constant as a fixup and 4 zeros.
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  EmitConstant(0, Size, CurByte, OS);
+}
+
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                                        unsigned RegOpcodeField,
+                                        unsigned TSFlags, unsigned &CurByte,
+                                        raw_ostream &OS,
+                                        SmallVectorImpl<MCFixup> &Fixups) const{
+  const MCOperand &Disp     = MI.getOperand(Op+3);
+  const MCOperand &Base     = MI.getOperand(Op);
+  const MCOperand &Scale    = MI.getOperand(Op+1);
+  const MCOperand &IndexReg = MI.getOperand(Op+2);
+  unsigned BaseReg = Base.getReg();
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+    
+    // rip-relative addressing is actually relative to the *next* instruction.
+    // Since an immediate can follow the mod/rm byte for an instruction, this
+    // means that we need to bias the immediate field of the instruction with
+    // the size of the immediate field.  If we have this case, add it into the
+    // expression to emit.
+    int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+    
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte),
+                  CurByte, OS, Fixups, -ImmSize);
+    return;
+  }
+  
+  unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+  
+  // Determine whether a SIB byte is needed.
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can 
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
+
+  if (// The SIB byte must be used if there is an index register.
+      IndexReg.getReg() == 0 && 
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!Is64BitMode || BaseReg != 0)) {
+
+    if (BaseReg == 0) {          // [disp32]     in X86-32 mode
+      EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+      EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+      return;
+    }
+    
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+      EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+      return;
+    }
+    
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (Disp.isImm() && isDisp8(Disp.getImm())) {
+      EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+      EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+      return;
+    }
+    
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+    return;
+  }
+    
+  // We need a SIB byte, so start by outputting the ModR/M byte first
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+  
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (!Disp.isImm()) {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) {
+    // Emit no displacement ModR/M byte
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+  } else if (isDisp8(Disp.getImm())) {
+    // Emit the disp8 encoding.
+    EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+  }
+  
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+  
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel 
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+  } else {
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+  }
+  
+  // Do we need to output a displacement?
+  if (ForceDisp8)
+    EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+  else if (ForceDisp32 || Disp.getImm() != 0)
+    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
+                                   const TargetInstrDesc &Desc) {
+  // Pseudo instructions never have a rex byte.
+  if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return 0;
+  
+  unsigned REX = 0;
+  if (TSFlags & X86II::REX_W)
+    REX |= 1 << 3;
+  
+  if (MI.getNumOperands() == 0) return REX;
+  
+  unsigned NumOps = MI.getNumOperands();
+  // FIXME: MCInst should explicitize the two-addrness.
+  bool isTwoAddr = NumOps > 1 &&
+                      Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+  
+  // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+  unsigned i = isTwoAddr ? 1 : 0;
+  for (; i != NumOps; ++i) {
+    const MCOperand &MO = MI.getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+    // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+    // that returns non-zero.
+    REX |= 0x40;
+    break;
+  }
+  
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMSrcReg:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2;
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 0;
+    }
+    break;
+  case X86II::MRMSrcMem: {
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2;
+    unsigned Bit = 0;
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit;
+        Bit++;
+      }
+    }
+    break;
+  }
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMDestMem: {
+    unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+    i = isTwoAddr ? 1 : 0;
+    if (NumOps > e && MI.getOperand(e).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+      REX |= 1 << 2;
+    unsigned Bit = 0;
+    for (; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit;
+        Bit++;
+      }
+    }
+    break;
+  }
+  default:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 0;
+    i = isTwoAddr ? 2 : 1;
+    for (unsigned e = NumOps; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 2;
+    }
+    break;
+  }
+  return REX;
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  unsigned TSFlags = Desc.TSFlags;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+  
+  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
+  // in order to provide diffability.
+
+  // Emit the lock opcode prefix as needed.
+  if (TSFlags & X86II::LOCK)
+    EmitByte(0xF0, CurByte, OS);
+  
+  // Emit segment override opcode prefix as needed.
+  switch (TSFlags & X86II::SegOvrMask) {
+  default: assert(0 && "Invalid segment!");
+  case 0: break;  // No segment override!
+  case X86II::FS:
+    EmitByte(0x64, CurByte, OS);
+    break;
+  case X86II::GS:
+    EmitByte(0x65, CurByte, OS);
+    break;
+  }
+  
+  // Emit the repeat opcode prefix as needed.
+  if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+    EmitByte(0xF3, CurByte, OS);
+  
+  // Emit the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    EmitByte(0x66, CurByte, OS);
+  
+  // Emit the address size opcode prefix as needed.
+  if (TSFlags & X86II::AdSize)
+    EmitByte(0x67, CurByte, OS);
+  
+  bool Need0FPrefix = false;
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case 0: break;  // No prefix!
+  case X86II::REP: break; // already handled.
+  case X86II::TB:  // Two-byte opcode prefix
+  case X86II::T8:  // 0F 38
+  case X86II::TA:  // 0F 3A
+    Need0FPrefix = true;
+    break;
+  case X86II::TF: // F2 0F 38
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XS:   // F3 0F
+    EmitByte(0xF3, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XD:   // F2 0F
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+  case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+  case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+  case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+  case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+  case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+  case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+  case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+  }
+  
+  // Handle REX prefix.
+  // FIXME: Can this come before F2 etc to simplify emission?
+  if (Is64BitMode) {
+    if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+      EmitByte(0x40 | REX, CurByte, OS);
+  }
+  
+  // 0x0F escape code must be emitted just before the opcode.
+  if (Need0FPrefix)
+    EmitByte(0x0F, CurByte, OS);
+  
+  // FIXME: Pull this up into previous switch if REX can be moved earlier.
+  switch (TSFlags & X86II::Op0Mask) {
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
+    EmitByte(0x38, CurByte, OS);
+    break;
+  case X86II::TA:    // 0F 3A
+    EmitByte(0x3A, CurByte, OS);
+    break;
+  }
+  
+  // If this is a two-address instruction, skip one of the register operands.
+  unsigned NumOps = Desc.getNumOperands();
+  unsigned CurOp = 0;
+  if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
+    ++CurOp;
+  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+    --NumOps;
+  
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg:
+    assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+  default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+    assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+  case X86II::Pseudo: return; // Pseudo instructions encode to nothing.
+  case X86II::RawFrm:
+    EmitByte(BaseOpcode, CurByte, OS);
+    break;
+      
+  case X86II::AddRegFrm:
+    EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+    break;
+      
+  case X86II::MRMDestReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp),
+                     GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
+    CurOp += 2;
+    break;
+  
+  case X86II::MRMDestMem:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitMemModRMByte(MI, CurOp,
+                     GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += X86AddrNumOperands + 1;
+    break;
+      
+  case X86II::MRMSrcReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
+                     CurByte, OS);
+    CurOp += 2;
+    break;
+    
+  case X86II::MRMSrcMem: {
+    EmitByte(BaseOpcode, CurByte, OS);
+
+    // FIXME: Maybe lea should have its own form?  This is a horrible hack.
+    int AddrOperands;
+    if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+        Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+      AddrOperands = X86AddrNumOperands - 1; // No segment register
+    else
+      AddrOperands = X86AddrNumOperands;
+    
+    EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += AddrOperands + 1;
+    break;
+  }
+
+  case X86II::MRM0r: case X86II::MRM1r:
+  case X86II::MRM2r: case X86II::MRM3r:
+  case X86II::MRM4r: case X86II::MRM5r:
+  case X86II::MRM6r: case X86II::MRM7r:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp++),
+                     (TSFlags & X86II::FormMask)-X86II::MRM0r,
+                     CurByte, OS);
+    break;
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += X86AddrNumOperands;
+    break;
+  case X86II::MRM_C1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC1, CurByte, OS);
+    break;
+  case X86II::MRM_C2:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC2, CurByte, OS);
+    break;
+  case X86II::MRM_C3:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC3, CurByte, OS);
+    break;
+  case X86II::MRM_C4:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC4, CurByte, OS);
+    break;
+  case X86II::MRM_C8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC8, CurByte, OS);
+    break;
+  case X86II::MRM_C9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC9, CurByte, OS);
+    break;
+  case X86II::MRM_E8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xE8, CurByte, OS);
+    break;
+  case X86II::MRM_F0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF0, CurByte, OS);
+    break;
+  case X86II::MRM_F8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF8, CurByte, OS);
+    break;
+  case X86II::MRM_F9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF9, CurByte, OS);
+    break;
+  }
+  
+  // If there is a remaining operand, it must be a trailing immediate.  Emit it
+  // according to the right size for the instruction.
+  if (CurOp != NumOps)
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+  
+#ifndef NDEBUG
+  // FIXME: Verify.
+  if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+    errs() << "Cannot encode all operands of: ";
+    MI.dump();
+    errs() << '\n';
+    abort();
+  }
+#endif
+}
diff --git a/lib/Target/X86/X86MCTargetExpr.cpp b/lib/Target/X86/X86MCTargetExpr.cpp
new file mode 100644
index 0000000..17b4fe8
--- /dev/null
+++ b/lib/Target/X86/X86MCTargetExpr.cpp
@@ -0,0 +1,48 @@
+//===- X86MCTargetExpr.cpp - X86 Target Specific MCExpr Implementation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+X86MCTargetExpr *X86MCTargetExpr::Create(const MCSymbol *Sym, VariantKind K,
+                                         MCContext &Ctx) {
+  return new (Ctx) X86MCTargetExpr(Sym, K);
+}
+
+void X86MCTargetExpr::PrintImpl(raw_ostream &OS) const {
+  OS << *Sym;
+  
+  switch (Kind) {
+  case Invalid:   OS << "@<invalid>"; break;
+  case GOT:       OS << "@GOT"; break;
+  case GOTOFF:    OS << "@GOTOFF"; break;
+  case GOTPCREL:  OS << "@GOTPCREL"; break;
+  case GOTTPOFF:  OS << "@GOTTPOFF"; break;
+  case INDNTPOFF: OS << "@INDNTPOFF"; break;
+  case NTPOFF:    OS << "@NTPOFF"; break;
+  case PLT:       OS << "@PLT"; break;
+  case TLSGD:     OS << "@TLSGD"; break;
+  case TPOFF:     OS << "@TPOFF"; break;
+  }
+}
+
+bool X86MCTargetExpr::EvaluateAsRelocatableImpl(MCValue &Res) const {
+  // FIXME: I don't know if this is right, it followed MCSymbolRefExpr.
+  
+  // Evaluate recursively if this is a variable.
+  if (Sym->isVariable())
+    return Sym->getValue()->EvaluateAsRelocatable(Res);
+  
+  Res = MCValue::get(Sym, 0, 0);
+  return true;
+}
diff --git a/lib/Target/X86/X86MCTargetExpr.h b/lib/Target/X86/X86MCTargetExpr.h
new file mode 100644
index 0000000..7de8a5c
--- /dev/null
+++ b/lib/Target/X86/X86MCTargetExpr.h
@@ -0,0 +1,49 @@
+//===- X86MCTargetExpr.h - X86 Target Specific MCExpr -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCTARGETEXPR_H
+#define X86_MCTARGETEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+/// X86MCTargetExpr - This class represents symbol variants, like foo@GOT.
+class X86MCTargetExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    Invalid,
+    GOT,
+    GOTOFF,
+    GOTPCREL,
+    GOTTPOFF,
+    INDNTPOFF,
+    NTPOFF,
+    PLT,
+    TLSGD,
+    TPOFF
+  };
+private:
+  /// Sym - The symbol being referenced.
+  const MCSymbol * const Sym;
+  /// Kind - The modifier.
+  const VariantKind Kind;
+  
+  X86MCTargetExpr(const MCSymbol *S, VariantKind K) : Sym(S), Kind(K) {}
+public:
+  static X86MCTargetExpr *Create(const MCSymbol *Sym, VariantKind K,
+                                 MCContext &Ctx);
+  
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res) const;
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index fafcf7e..4b2529b 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -18,12 +18,6 @@
 
 namespace llvm {
 
-enum NameDecorationStyle {
-  None,
-  StdCall,
-  FastCall
-};
-  
 /// X86MachineFunctionInfo - This class is derived from MachineFunction and
 /// contains private X86 target-specific information for each MachineFunction.
 class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -41,16 +35,11 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// Used on windows platform for stdcall & fastcall name decoration
   unsigned BytesToPopOnReturn;
 
-  /// DecorationStyle - If the function requires additional name decoration,
-  /// DecorationStyle holds the right way to do so.
-  NameDecorationStyle DecorationStyle;
-
   /// ReturnAddrIndex - FrameIndex for return slot.
   int ReturnAddrIndex;
 
-  /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
-  /// Used for creating an area before the register spill area on the stack
-  /// the returnaddr can be savely move to this area
+  /// TailCallReturnAddrDelta - The number of bytes by which return address
+  /// stack slot is moved as the result of tail call optimization.
   int TailCallReturnAddrDelta;
 
   /// SRetReturnReg - Some subtargets require that sret lowering includes
@@ -67,7 +56,6 @@ public:
   X86MachineFunctionInfo() : ForceFramePointer(false),
                              CalleeSavedFrameSize(0),
                              BytesToPopOnReturn(0),
-                             DecorationStyle(None),
                              ReturnAddrIndex(0),
                              TailCallReturnAddrDelta(0),
                              SRetReturnReg(0),
@@ -77,7 +65,6 @@ public:
     : ForceFramePointer(false),
       CalleeSavedFrameSize(0),
       BytesToPopOnReturn(0),
-      DecorationStyle(None),
       ReturnAddrIndex(0),
       TailCallReturnAddrDelta(0),
       SRetReturnReg(0),
@@ -92,9 +79,6 @@ public:
   unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
   void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
 
-  NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
-  void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
-
   int getRAIndex() const { return ReturnAddrIndex; }
   void setRAIndex(int Index) { ReturnAddrIndex = Index; }
 
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f959a2d..8524236 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -473,9 +473,9 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
 }
 
 int
-X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
   const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
   int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
   uint64_t StackSize = MFI->getStackSize();
 
@@ -485,7 +485,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
       Offset += SlotSize;
     } else {
       unsigned Align = MFI->getObjectAlignment(FI);
-      assert( (-(Offset + StackSize)) % Align == 0);
+      assert((-(Offset + StackSize)) % Align == 0);
       Align = 0;
       return Offset + StackSize;
     }
@@ -498,7 +498,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
     Offset += SlotSize;
 
     // Skip the RETADDR move area
-    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
     if (TailCallReturnAddrDelta < 0)
       Offset -= TailCallReturnAddrDelta;
@@ -627,10 +627,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                       RegScavenger *RS) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  // Calculate and set max stack object alignment early, so we can decide
-  // whether we will need stack realignment (and thus FP).
-  MFI->calculateMaxStackAlignment();
-
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
 
@@ -1242,13 +1238,19 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     }
 
     // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
+    if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
-    else if (RetOpcode== X86::TCRETURNri64)
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                         JumpTarget.getTargetFlags());
+    } else if (RetOpcode == X86::TCRETURNri64) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
-    else
+    } else {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index dec3fba..8fb5e92 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -156,7 +156,7 @@ public:
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
   // Exception handling queries.
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 6db0cc3..1559bf7 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -512,20 +512,17 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
   let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
 }
 
-// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
-// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
-// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
-// of registers which do not by themselves require a REX prefix.
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
-                              [AL, CL, DL, AH, CH, DH, BL, BH,
-                               SIL, DIL, BPL, SPL]> {
+                              [AL, CL, DL, AH, CH, DH, BL, BH]> {
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
+    // In 64-bit mode, it's not safe to blindly allocate H registers.
     static const unsigned X86_GR8_NOREX_AO_64[] = {
-      X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL
+      X86::AL, X86::CL, X86::DL, X86::BL
     };
 
     GR8_NOREXClass::iterator
@@ -541,21 +538,15 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
     GR8_NOREXClass::iterator
     GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (!Subtarget.is64Bit())
-        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-        return begin() + 8;
-      else if (RI->hasFP(MF))
-        // If so, don't allocate SPL or BPL.
-        return array_endof(X86_GR8_NOREX_AO_64) - 1;
-      else
-        // If not, just don't allocate SPL.
+      if (Subtarget.is64Bit())
         return array_endof(X86_GR8_NOREX_AO_64);
+      else
+        return end();
     }
   }];
 }
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
 def GR16_NOREX : RegisterClass<"X86", [i16], 16,
                                [AX, CX, DX, SI, DI, BX, BP, SP]> {
   let SubRegClassList = [GR8_NOREX, GR8_NOREX];
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 2039be7..adef5bc 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -53,9 +53,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
   if (GV->hasDLLImportLinkage())
     return X86II::MO_DLLIMPORT;
 
-  // GV with ghost linkage (in JIT lazy compilation mode) do not require an
+  // Materializable GVs (in JIT lazy compilation mode) do not require an
   // extra load from stub.
-  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
 
   // X86-64 in PIC mode.
   if (isPICStyleRIPRel()) {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 618dd10..5e05c2f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -175,7 +175,7 @@ public:
     else if (isTargetDarwin())
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
     else if (isTargetMingw() || isTargetWindows())
-      p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";
+      p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
     else
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 731c3ab..7802f98 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   case Triple::MinGW32:
   case Triple::MinGW64:
   case Triple::Cygwin:
-    return new X86MCAsmInfoCOFF(TheTriple);
   case Triple::Win32:
-    return new X86WinMCAsmInfo(TheTriple);
+    return new X86MCAsmInfoCOFF(TheTriple);
   default:
     return new X86ELFMCAsmInfo(TheTriple);
   }
@@ -48,8 +47,10 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
 
   // Register the code emitter.
-  TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
-  TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
+                                      createX86_32MCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
+                                      createX86_64MCCodeEmitter);
 }
 
 
@@ -145,10 +146,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
   // Install an instruction selector.
   PM.add(createX86ISelDag(*this, OptLevel));
 
-  // If we're using Fast-ISel, clean up the mess.
-  if (EnableFastISel)
-    PM.add(createDeadMachineInstructionElimPass());
-
   // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
   PM.add(createX87FPRegKillInserterPass());
 
@@ -168,22 +165,6 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
 
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
-                                      MachineCodeEmitter &MCE) {
-  // FIXME: Move this to TargetJITInfo!
-  // On Darwin, do not override 64-bit setting made in X86TargetMachine().
-  if (DefRelocModel == Reloc::Default && 
-      (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
-    setRelocationModel(Reloc::Static);
-    Subtarget.setPICStyle(PICStyles::None);
-  }
-  
-  PM.add(createX86CodeEmitterPass(*this, MCE));
-
-  return false;
-}
-
-bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   // On Darwin, do not override 64-bit setting made in X86TargetMachine().
@@ -199,34 +180,6 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      ObjectCodeEmitter &OCE) {
-  PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-
-bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            MachineCodeEmitter &MCE) {
-  PM.add(createX86CodeEmitterPass(*this, MCE));
-  return false;
-}
-
-bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            JITCodeEmitter &JCE) {
-  PM.add(createX86JITCodeEmitterPass(*this, JCE));
-  return false;
-}
-
-bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            ObjectCodeEmitter &OCE) {
-  PM.add(createX86ObjectCodeEmitterPass(*this, OCE));
-  return false;
-}
-
 void X86TargetMachine::setCodeModelForStatic() {
 
     if (getCodeModel() != CodeModel::Default) return;
@@ -246,32 +199,3 @@ void X86TargetMachine::setCodeModelForJIT() {
   else
     setCodeModel(CodeModel::Small);
 }
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const {
-  if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10)
-    return DwarfLSDAEncoding::Default;
-
-  return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d05bebd..2bb5454 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -62,37 +62,12 @@ public:
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
 
-  /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
-  /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
-  /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
-  /// as a 4-byte pointer by default. However, some systems may require a
-  /// different size due to bugs or other conditions. We will default to a
-  /// 4-byte encoding unless the system tells us otherwise.
-  ///
-  /// FIXME: This call-back isn't good! We should be using the correct encoding
-  /// regardless of the system. However, there are some systems which have bugs
-  /// that prevent this from occuring.
-  virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
-
   // Set up the pass pipeline.
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              MachineCodeEmitter &MCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &JCE);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              ObjectCodeEmitter &OCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    MachineCodeEmitter &MCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    JITCodeEmitter &JCE);
-  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
-                                    CodeGenOpt::Level OptLevel,
-                                    ObjectCodeEmitter &OCE);
 };
 
 /// X86_32TargetMachine - X86 32-bit target machine.
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 41ad153..d1ee3fc 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -7,60 +7,176 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "X86MCTargetExpr.h"
 #include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
 using namespace llvm;
+using namespace dwarf;
 
 const MCExpr *X8632_MachoTargetObjectFile::
 getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
   // The mach-o version of this method defaults to returning a stub reference.
-  IsIndirect = true;
-  IsPCRel    = false;
-  
-  
-  MachineModuleInfoMachO &MachOMMI =
-  MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  
-  // FIXME: Use GetSymbolWithGlobalValueBase.
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, true);
-  Name += "$non_lazy_ptr";
-  
-  // Add information about the stub reference to MachOMMI so that the stub gets
-  // emitted by the asmprinter.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
-  const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
-  if (StubSym == 0) {
-    Name.clear();
-    Mang->getNameWithPrefix(Name, GV, false);
-    StubSym = getContext().GetOrCreateSymbol(Name.str());
+
+  if (Encoding & DW_EH_PE_indirect) {
+    MachineModuleInfoMachO &MachOMMI =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += "$non_lazy_ptr";
+
+    // Add information about the stub reference to MachOMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    }
+
+    return TargetLoweringObjectFile::
+      getSymbolForDwarfReference(Sym, MMI,
+                                 Encoding & ~dwarf::DW_EH_PE_indirect);
   }
-  
-  return MCSymbolRefExpr::Create(Sym, getContext());
+
+  return TargetLoweringObjectFileMachO::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
 }
 
 const MCExpr *X8664_MachoTargetObjectFile::
 getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  
+                           MachineModuleInfo *MMI, unsigned Encoding) const {
+
   // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
   // is an indirect pc-relative reference.
-  IsIndirect = true;
-  IsPCRel    = true;
-  
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, false);
-  Name += "@GOTPCREL";
-  const MCExpr *Res =
-    MCSymbolRefExpr::Create(Name.str(), getContext());
-  const MCExpr *Four = MCConstantExpr::Create(4, getContext());
-  return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, false);
+    const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+    const MCExpr *Res =
+      X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
+    const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+    return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+  }
+
+  return TargetLoweringObjectFileMachO::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
 }
 
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                             Model == CodeModel::Medium ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8632_MachoTargetObjectFile::getPersonalityEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8632_MachoTargetObjectFile::getLSDAEncoding() const {
+  return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8632_MachoTargetObjectFile::getFDEEncoding() const {
+  return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8632_MachoTargetObjectFile::getTTypeEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8664_MachoTargetObjectFile::getPersonalityEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8664_MachoTargetObjectFile::getLSDAEncoding() const {
+  return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8664_MachoTargetObjectFile::getFDEEncoding() const {
+  return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned X8664_MachoTargetObjectFile::getTTypeEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 377a93b..0fff194 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -10,21 +10,27 @@
 #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
 #define LLVM_TARGET_X86_TARGETOBJECTFILE_H
 
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-  
+  class X86TargetMachine;
+
   /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
   /// Darwin/x86-32.
   class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
   public:
-    
+
     virtual const MCExpr *
     getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                     MachineModuleInfo *MMI,
-                                     bool &IsIndirect, bool &IsPCRel) const;
+                              MachineModuleInfo *MMI, unsigned Encoding) const;
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
   };
-  
+
   /// X8664_MachoTargetObjectFile - This TLOF implementation is used for
   /// Darwin/x86-64.
   class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
@@ -32,9 +38,35 @@ namespace llvm {
 
     virtual const MCExpr *
     getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                     MachineModuleInfo *MMI,
-                                     bool &IsIndirect, bool &IsPCRel) const;
+                              MachineModuleInfo *MMI, unsigned Encoding) const;
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+  class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { };
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
   };
+
+  class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { };
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile
index f0e883e..82dc1df 100644
--- a/lib/Target/XCore/AsmPrinter/Makefile
+++ b/lib/Target/XCore/AsmPrinter/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMXCoreAsmPrinter
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' XCore target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 40d7160..d18f55d 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -32,7 +32,6 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,8 +41,6 @@
 #include <cctype>
 using namespace llvm;
 
-STATISTIC(EmittedInsts, "Number of machine instrs printed");
-
 static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
   cl::desc("Maximum number of threads (for emulation thread-local storage)"),
   cl::Hidden,
@@ -55,8 +52,9 @@ namespace {
     const XCoreSubtarget &Subtarget;
   public:
     explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             const MCAsmInfo *T, bool V)
-      : AsmPrinter(O, TM, T, V),
+                             MCContext &Ctx, MCStreamer &Streamer,
+                             const MCAsmInfo *T)
+      : AsmPrinter(O, TM, Ctx, Streamer, T),
       Subtarget(TM.getSubtarget<XCoreSubtarget>()) {}
 
     virtual const char *getPassName() const {
@@ -74,13 +72,13 @@ namespace {
     virtual void EmitGlobalVariable(const GlobalVariable *GV);
 
     void emitFunctionStart(MachineFunction &MF);
-    void emitFunctionEnd(MachineFunction &MF);
 
     void printInstruction(const MachineInstr *MI);  // autogenerated.
     static const char *getRegisterName(unsigned RegNo);
 
-    void printMachineInstruction(const MachineInstr *MI);
-    bool runOnMachineFunction(MachineFunction &F);
+    bool runOnMachineFunction(MachineFunction &MF);
+    void EmitInstruction(const MachineInstr *MI);
+    void EmitFunctionBodyEnd();
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AsmPrinter::getAnalysisUsage(AU);
@@ -106,7 +104,7 @@ void XCoreAsmPrinter::emitArrayBound(const MCSymbol *Sym,
     cast<PointerType>(GV->getType())->getElementType())) {
     O << MAI->getGlobalDirective() << *Sym;
     O << ".globound" << "\n";
-    O << MAI->getSetDirective() << *Sym;
+    O << "\t.set\t" << *Sym;
     O << ".globound" << "," << ATy->getNumElements() << "\n";
     if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
       // TODO Use COMDAT groups for LinkOnceLinkage
@@ -150,8 +148,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   case GlobalValue::PrivateLinkage:
   case GlobalValue::LinkerPrivateLinkage:
     break;
-  case GlobalValue::GhostLinkage:
-    llvm_unreachable("Should not have any unmaterialized functions!");
   case GlobalValue::DLLImportLinkage:
     llvm_unreachable("DLLImport linkage is not supported by this target!");
   case GlobalValue::DLLExportLinkage:
@@ -222,26 +218,22 @@ void XCoreAsmPrinter::emitFunctionStart(MachineFunction &MF) {
   O << *CurrentFnSym << ":\n";
 }
 
-/// Emit the directives on the end of functions
-void XCoreAsmPrinter::emitFunctionEnd(MachineFunction &MF) {
-  // Mark the end of the function
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+  // Emit function end directives
   O << "\t.cc_bottom " << *CurrentFnSym << ".function\n";
 }
 
 /// runOnMachineFunction - This uses the printMachineInstruction()
 /// method to print assembly for each instruction.
 ///
-bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF)
-{
-  this->MF = &MF;
-
+bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SetupMachineFunction(MF);
 
   // Print out constants referenced by the function
-  EmitConstantPool(MF.getConstantPool());
-
-  // Print out jump tables referenced by the function
-  EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+  EmitConstantPool();
 
   // Emit the function start directives
   emitFunctionStart(MF);
@@ -249,32 +241,7 @@ bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF)
   // Emit pre-function debug information.
   DW->BeginFunction(&MF);
 
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-
-    // Print a label for the basic block.
-    if (I != MF.begin()) {
-      EmitBasicBlockStart(I);
-    }
-
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Print the assembly for the instruction.
-      printMachineInstruction(II);
-    }
-
-    // Each Basic Block is separated by a newline
-    O << '\n';
-  }
-
-  // Emit function end directives
-  emitFunctionEnd(MF);
-  
-  // Emit post-function debug information.
-  DW->EndFunction(&MF);
-
-  // We didn't modify anything.
+  EmitFunctionBody();
   return false;
 }
 
@@ -300,7 +267,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *GetMBBSymbol(MO.getMBB()->getNumber());
+    O << *MO.getMBB()->getSymbol(OutContext);
     break;
   case MachineOperand::MO_GlobalAddress:
     O << *GetGlobalValueSymbol(MO.getGlobal());
@@ -333,24 +300,17 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
-  ++EmittedInsts;
-
-  processDebugLoc(MI, true);
-
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Check for mov mnemonic
   unsigned src, dst, srcSR, dstSR;
   if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
     O << "\tmov " << getRegisterName(dst) << ", ";
-    O << getRegisterName(src) << '\n';
+    O << getRegisterName(src);
+    OutStreamer.AddBlankLine();
     return;
   }
   printInstruction(MI);
-  if (VerboseAsm)
-    EmitComments(*MI);
-  O << '\n';
-
-  processDebugLoc(MI, false);
+  OutStreamer.AddBlankLine();
 }
 
 // Force static initialization.
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index 3bb127f..1b70974 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMXCoreCodeGen
 TARGET = XCore
-CXXFLAGS = -fno-rtti
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
diff --git a/lib/Target/XCore/TargetInfo/Makefile b/lib/Target/XCore/TargetInfo/Makefile
index 83bba13..f8a4095 100644
--- a/lib/Target/XCore/TargetInfo/Makefile
+++ b/lib/Target/XCore/TargetInfo/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../../../..
 LIBRARYNAME = LLVMXCoreInfo
-CXXFLAGS = -fno-rtti
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6849e0b..57fd43b 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -390,7 +390,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     if (Offset % 4 == 0) {
       // We've managed to infer better alignment information than the load
       // already has. Use an aligned load.
-      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
+      //
+      // FIXME: No new alignment information is actually passed here.
+      // Should the offset really be 4?
+      //
+      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+                         false, false, 0);
     }
     // Lower to
     // ldw low, base[offset >> 2]
@@ -407,9 +412,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
     
     SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
-                               LowAddr, NULL, 4);
+                              LowAddr, NULL, 4, false, false, 0);
     SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
-                               HighAddr, NULL, 4);
+                               HighAddr, NULL, 4, false, false, 0);
     SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
     SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
     SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
@@ -423,12 +428,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     int SVOffset = LD->getSrcValueOffset();
     SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
                                  BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
-                                 LD->isVolatile(), 2);
+                                 LD->isVolatile(), LD->isNonTemporal(), 2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
                                   HighAddr, LD->getSrcValue(), SVOffset + 2,
-                                  MVT::i16, LD->isVolatile(), 2);
+                                  MVT::i16, LD->isVolatile(),
+                                  LD->isNonTemporal(), 2);
     SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
                                       DAG.getConstant(16, MVT::i32));
     SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
@@ -487,12 +493,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
                                       DAG.getConstant(16, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
                                          ST->getSrcValue(), SVOffset, MVT::i16,
-                                         ST->isVolatile(), 2);
+                                         ST->isVolatile(), ST->isNonTemporal(),
+                                         2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
                                           ST->getSrcValue(), SVOffset + 2,
-                                          MVT::i16, ST->isVolatile(), 2);
+                                          MVT::i16, ST->isVolatile(),
+                                          ST->isNonTemporal(), 2);
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
   }
   
@@ -561,15 +569,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG)
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
-                               Node->getOperand(1), V, 0);
+                               Node->getOperand(1), V, 0, false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, 
                      DAG.getConstant(VT.getSizeInBits(), 
                                      getPointerTy()));
   // Store the incremented VAList to the legalized pointer
-  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0);
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
+                      false, false, 0);
   // Load the actual argument out of the pointer VAList
-  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0);
+  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -582,7 +591,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG)
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -611,11 +621,13 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
 SDValue
 XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool isTailCall,
+                               bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
                                DebugLoc dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) {
+  // XCore target does not yet support tail call optimization.
+  isTailCall = false;
 
   // For now, only CallingConv::C implemented
   switch (CallConv)
@@ -875,7 +887,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
   
@@ -906,7 +919,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
         RegInfo.addLiveIn(ArgRegs[i], VReg);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         // Move argument from virt reg -> stack
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                     false, false, 0);
         MemOps.push_back(Store);
       }
       if (!MemOps.empty())
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f86be5e..5095f36 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -28,7 +28,7 @@ namespace llvm {
   namespace XCoreISD {
     enum NodeType {
       // Start the numbering where the builtin ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END,
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
       // Branch and link (call)
       BL,
@@ -149,7 +149,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool isTailCall,
+                bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
                 DebugLoc dl, SelectionDAG &DAG,
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index d4ae49e..10dc18c 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -686,7 +686,7 @@ def LDAP_lu10_ba : _FLU10<(outs),
                           [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
 
 let isCall=1,
-// All calls clobber the the link register and the non-callee-saved registers:
+// All calls clobber the link register and the non-callee-saved registers:
 Defs = [R0, R1, R2, R3, R11, LR] in {
 def BL_u10 : _FU10<
                   (outs),
@@ -779,7 +779,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
                  []>;
 
 let isCall=1, 
-// All calls clobber the the link register and the non-callee-saved registers:
+// All calls clobber the link register and the non-callee-saved registers:
 Defs = [R0, R1, R2, R3, R11, LR] in {
 def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
                  "bla $addr",
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp
index dffdda9..bf78575 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp
@@ -22,7 +22,6 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) {
   AscizDirective = ".asciiz";
   WeakDefDirective = "\t.weak\t";
   WeakRefDirective = "\t.weak\t";
-  SetDirective = "\t.set\t";
 
   // Debug
   HasLEB128 = true;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 7efb990..7424c78 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -10,13 +10,12 @@
 #ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
 #define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
 
   class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
   public:
-    
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
     // TODO: Classify globals as xcore wishes.
diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile
index 46f8098..c5e75d4 100644
--- a/lib/Transforms/Hello/Makefile
+++ b/lib/Transforms/Hello/Makefile
@@ -11,7 +11,6 @@ LEVEL = ../../..
 LIBRARYNAME = LLVMHello
 LOADABLE_MODULE = 1
 USEDLIBS =
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index d8190a4..325d353 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -247,7 +247,7 @@ static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
     return Low != Set.end() && IsPrefix(*Low, Indices);
 }
 
-/// Mark the given indices (ToMark) as safe in the the given set of indices
+/// Mark the given indices (ToMark) as safe in the given set of indices
 /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
 /// is already a prefix of Indices in Safe, Indices are implicitely marked safe
 /// already. Furthermore, any indices that Indices is itself a prefix of, are
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 4972687..3c05f88 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,11 @@
 
 #define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumMerged, "Number of global constants merged");
@@ -48,10 +49,10 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 bool ConstantMerge::runOnModule(Module &M) {
   // Map unique constant/section pairs to globals.  We don't want to merge
   // globals in different sections.
-  std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap;
+  DenseMap<Constant*, GlobalVariable*> CMap;
 
   // Replacements - This vector contains a list of replacements to perform.
-  std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements;
+  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
 
   bool MadeChange = false;
 
@@ -76,19 +77,21 @@ bool ConstantMerge::runOnModule(Module &M) {
         continue;
       }
       
-      // Only process constants with initializers.
-      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
-        Constant *Init = GV->getInitializer();
-
-        // Check to see if the initializer is already known.
-        GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())];
-
-        if (Slot == 0) {    // Nope, add it to the map.
-          Slot = GV;
-        } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
-          // Make all uses of the duplicate constant use the canonical version.
-          Replacements.push_back(std::make_pair(GV, Slot));
-        }
+      // Only process constants with initializers in the default addres space.
+      if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+        continue;
+      
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *&Slot = CMap[Init];
+
+      if (Slot == 0) {    // Nope, add it to the map.
+        Slot = GV;
+      } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
+        // Make all uses of the duplicate constant use the canonical version.
+        Replacements.push_back(std::make_pair(GV, Slot));
       }
     }
 
@@ -100,11 +103,11 @@ bool ConstantMerge::runOnModule(Module &M) {
     // now.  This avoid invalidating the pointers in CMap, which are unneeded
     // now.
     for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
-      // Eliminate any uses of the dead global...
+      // Eliminate any uses of the dead global.
       Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
 
-      // Delete the global value from the module...
-      M.getGlobalList().erase(Replacements[i].first);
+      // Delete the global value from the module.
+      Replacements[i].first->eraseFromParent();
     }
 
     NumMerged += Replacements.size();
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 025d77e..662fbb5 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -57,13 +57,13 @@ ModulePass *llvm::createDeadTypeEliminationPass() {
 //
 static inline bool ShouldNukeSymtabEntry(const Type *Ty){
   // Nuke all names for primitive types!
-  if (Ty->isPrimitiveType() || Ty->isInteger()) 
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
     return true;
 
   // Nuke all pointers to primitive types as well...
   if (const PointerType *PT = dyn_cast<PointerType>(Ty))
     if (PT->getElementType()->isPrimitiveType() ||
-        PT->getElementType()->isInteger()) 
+        PT->getElementType()->isIntegerTy()) 
       return true;
 
   return false;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ee260e9..df060eb 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -638,8 +638,8 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
     } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
       // If we've already seen this phi node, ignore it, it has already been
       // checked.
-      if (PHIs.insert(PN))
-        return AllUsesOfValueWillTrapIfNull(PN, PHIs);
+      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+        return false;
     } else if (isa<ICmpInst>(*UI) &&
                isa<ConstantPointerNull>(UI->getOperand(1))) {
       // Ignore setcc X, null
@@ -1590,7 +1590,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   // where v1 and v2 both require constant pool loads, a big loss.
   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
-      GVElType->isFloatingPoint() ||
+      GVElType->isFloatingPointTy() ||
       isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
     return false;
   
@@ -1925,7 +1925,7 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       if (!ATy) return 0;
       const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       if (!STy || STy->getNumElements() != 2 ||
-          !STy->getElementType(0)->isInteger(32)) return 0;
+          !STy->getElementType(0)->isIntegerTy(32)) return 0;
       const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
       if (!PFTy) return 0;
       const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 0990278..752a97c 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -38,8 +38,15 @@ STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
 static cl::opt<int>
-InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore,
-        cl::desc("Control the amount of inlining to perform (default = 200)"));
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+        cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+              cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
 
 Inliner::Inliner(void *ID) 
   : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
@@ -172,13 +179,23 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
   return true;
 }
 
-unsigned Inliner::getInlineThreshold(Function* Caller) const {
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+  int thres = InlineThreshold;
+
+  // Listen to optsize when -inline-limit is not given.
+  Function *Caller = CS.getCaller();
   if (Caller && !Caller->isDeclaration() &&
       Caller->hasFnAttr(Attribute::OptimizeForSize) &&
       InlineLimit.getNumOccurrences() == 0)
-    return 50;
-  else
-    return InlineThreshold;
+    thres = OptSizeThreshold;
+
+  // Listen to inlinehint when it would increase the threshold.
+  Function *Callee = CS.getCalledFunction();
+  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+      Callee->hasFnAttr(Attribute::InlineHint))
+    thres = HintThreshold;
+
+  return thres;
 }
 
 /// shouldInline - Return true if the inliner should attempt to inline
@@ -200,7 +217,7 @@ bool Inliner::shouldInline(CallSite CS) {
   
   int Cost = IC.getValue();
   Function *Caller = CS.getCaller();
-  int CurrentThreshold = getInlineThreshold(Caller);
+  int CurrentThreshold = getInlineThreshold(CS);
   float FudgeFactor = getInlineFudgeFactor(CS);
   if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
     DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
@@ -236,8 +253,7 @@ bool Inliner::shouldInline(CallSite CS) {
 
       outerCallsFound = true;
       int Cost2 = IC2.getValue();
-      Function *Caller2 = CS2.getCaller();
-      int CurrentThreshold2 = getInlineThreshold(Caller2);
+      int CurrentThreshold2 = getInlineThreshold(CS2);
       float FudgeFactor2 = getInlineFudgeFactor(CS2);
 
       if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile
index fd018c4..5c42374 100644
--- a/lib/Transforms/IPO/Makefile
+++ b/lib/Transforms/IPO/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMipo
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index fa8845b..b07e22c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -467,7 +467,6 @@ static LinkageCategory categorize(const Function *F) {
   case GlobalValue::AppendingLinkage:
   case GlobalValue::DLLImportLinkage:
   case GlobalValue::DLLExportLinkage:
-  case GlobalValue::GhostLinkage:
   case GlobalValue::CommonLinkage:
     return ExternalStrong;
   }
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index f40902f..f8ec722 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -117,7 +117,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   DominatorTree DT;
   DT.runOnFunction(*duplicateFunction);
   
-  // Extract the body of the the if.
+  // Extract the body of the if.
   Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
   
   // Inline the top-level if test into all callers.
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0e0d83a..310e4a2 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,6 +214,15 @@ static bool StripDebugInfo(Module &M) {
     Changed = true;
   }
 
+  if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+    while (!DbgVal->use_empty()) {
+      CallInst *CI = cast<CallInst>(DbgVal->use_back());
+      CI->eraseFromParent();
+    }
+    DbgVal->eraseFromParent();
+    Changed = true;
+  }
+
   NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
   if (NMD) {
     Changed = true;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 5367900..09accb6 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -199,11 +199,12 @@ private:
                                   SmallVectorImpl<Value*> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
                                  
-  /// ValueRequiresCast - Return true if the cast from "V to Ty" actually
-  /// results in any code being generated.  It does not require codegen if V is
-  /// simple enough or if the cast can be folded into other casts.
-  bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
-                         const Type *Ty);
+  /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+  /// results in any code being generated and is interesting to optimize out. If
+  /// the cast can be eliminated by some other simple transformation, we prefer
+  /// to do the simplification first.
+  bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+                          const Type *Ty);
 
   Instruction *visitCallSite(CallSite CS);
   bool transformConstExprCastCall(CallSite CS);
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 4891ff0..2da17f1 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -35,7 +35,7 @@ static Constant *SubOne(ConstantInt *C) {
 // Otherwise, return null.
 //
 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isInteger())
+  if (!V->hasOneUse() || !V->getType()->isIntegerTy())
     return 0;
   
   Instruction *I = dyn_cast<Instruction>(V);
@@ -121,50 +121,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
       uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
       const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
-      
-      uint32_t Size = TySizeBits / 2;
-      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
-      APInt CFF80Val(-C0080Val);
-      do {
-        if (TySizeBits > Size) {
-          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
-          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
-          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
-              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
-            // This is a sign extend if the top bits are known zero.
-            if (!MaskedValueIsZero(XorLHS, 
-                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
-              Size = 0;  // Not a sign ext, but can't be any others either.
-            break;
-          }
-        }
-        Size >>= 1;
-        C0080Val = APIntOps::lshr(C0080Val, Size);
-        CFF80Val = APIntOps::ashr(CFF80Val, Size);
-      } while (Size >= 1);
-      
-      // FIXME: This shouldn't be necessary. When the backends can handle types
-      // with funny bit widths then this switch statement should be removed. It
-      // is just here to get the size of the "middle" type back up to something
-      // that the back ends can handle.
-      const Type *MiddleType = 0;
-      switch (Size) {
-        default: break;
-        case 32:
-        case 16:
-        case  8: MiddleType = IntegerType::get(I.getContext(), Size); break;
+      unsigned ExtendAmt = 0;
+      // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+      // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+      if (XorRHS->getValue() == -RHSVal) {
+        if (RHSVal.isPowerOf2())
+          ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+        else if (XorRHS->getValue().isPowerOf2())
+          ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+      }
+
+      if (ExtendAmt) {
+        APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+        if (!MaskedValueIsZero(XorLHS, Mask))
+          ExtendAmt = 0;
       }
-      if (MiddleType) {
-        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
-        return new SExtInst(NewTrunc, I.getType(), I.getName());
+
+      if (ExtendAmt) {
+        Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+        Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+        return BinaryOperator::CreateAShr(NewShl, ShAmt);
       }
     }
   }
 
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(LHS, RHS);
 
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     // X + X --> X << 1
     if (LHS == RHS)
       return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
@@ -184,7 +168,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
   if (Value *LHSV = dyn_castNegVal(LHS)) {
-    if (LHS->getType()->isIntOrIntVector()) {
+    if (LHS->getType()->isIntOrIntVectorTy()) {
       if (Value *RHSV = dyn_castNegVal(RHS)) {
         Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
@@ -238,7 +222,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // W*X + Y*Z --> W * (X+Z)  iff W == Y
-  if (I.getType()->isIntOrIntVector()) {
+  if (I.getType()->isIntOrIntVectorTy()) {
     Value *W, *X, *Y, *Z;
     if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
         match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -576,7 +560,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(Op0, Op1);
   
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
@@ -676,6 +660,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
               return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
                                           ConstantExpr::getNeg(DivRHS));
 
+      // 0 - (C << X)  -> (-C << X)
+      if (Op1I->getOpcode() == Instruction::Shl)
+        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+          if (CSI->isZero())
+            if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0)))
+              return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1));
+
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
       if (dyn_castFoldableMul(Op1I, C2) == Op0) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fa7bb12..5e47953 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -546,7 +546,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     std::swap(LHSCC, RHSCC);
   }
 
-  // At this point, we know we have have two icmp instructions
+  // At this point, we know we have two icmp instructions
   // comparing a value against two constants and and'ing the result
   // together.  Because of the above check, we know that we only have
   // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
@@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
       if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
         return Res;
-
+  
+  // If and'ing two fcmp, try combine them into one.
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
+        return Res;
+  
+  
   // fold (and (cast A), (cast B)) -> (cast (and A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() &&
-            SrcTy->isIntOrIntVector() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
-                              I.getType()) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType())) {
-          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
-                                            Op1C->getOperand(0), I.getName());
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+          SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+        
+        // Only do this if the casts both really cause code to be generated.
+        if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
+        
+        // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) {
+              InsertNewInstBefore(Res, I);
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+            }
+        
+        // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) {
+              InsertNewInstBefore(Res, I);
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+            }
       }
+    }
     
   // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       }
   }
 
-  // If and'ing two fcmp, try combine them into one.
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
   return Changed ? &I : 0;
 }
 
@@ -1142,18 +1160,20 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
                                          Value *C, Value *D) {
   // If A is not a select of -1/0, this cannot match.
   Value *Cond = 0;
-  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
+  if (!match(A, m_SExt(m_Value(Cond))) ||
+      !Cond->getType()->isIntegerTy(1))
     return 0;
 
   // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
-  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
+  if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, B);
-  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+  if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, B);
+  
   // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
-  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
+  if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, D);
-  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+  if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, D);
   return 0;
 }
@@ -1224,7 +1244,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     std::swap(LHSCC, RHSCC);
   }
   
-  // At this point, we know we have have two icmp instructions
+  // At this point, we know we have two icmp instructions
   // comparing a value against two constants and or'ing the result
   // together.  Because of the above check, we know that we only have
   // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
@@ -1595,15 +1615,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       }
     }
 
-    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
-    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
-      return Match;
+    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants.
+    // Don't do this for vector select idioms, the code generator doesn't handle
+    // them well yet.
+    if (!isa<VectorType>(I.getType())) {
+      if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+        return Match;
+    }
 
     // ((A&~B)|(~A&B)) -> A^B
     if ((match(C, m_Not(m_Specific(D))) &&
@@ -1663,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
         return Res;
     
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
+        return Res;
+  
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
-            !isa<ICmpInst>(Op1C->getOperand(0))) {
-          const Type *SrcTy = Op0C->getOperand(0)->getType();
-          if (SrcTy == Op1C->getOperand(0)->getType() &&
-              SrcTy->isIntOrIntVector() &&
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() &&
+            SrcTy->isIntOrIntVectorTy()) {
+          Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+          if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
               // Only do this if the casts both really cause code to be
               // generated.
-              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                                I.getType()) &&
-              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                                I.getType())) {
-            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
-                                             Op1C->getOperand(0), I.getName());
+              ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+              ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+            Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
             return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
           }
+          
+          // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+          // cast is otherwise not optimizable.  This happens for vector sexts.
+          if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+            if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+              if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) {
+                InsertNewInstBefore(Res, I);
+                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+              }
+          
+          // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+          // cast is otherwise not optimizable.  This happens for vector sexts.
+          if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+            if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+              if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) {
+                InsertNewInstBefore(Res, I);
+                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+              }
         }
       }
   }
   
-    
-  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
   return Changed ? &I : 0;
 }
 
@@ -1978,12 +2016,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
         const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
             // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType()) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType())) {
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                               I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                               I.getType())) {
           Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
                                             Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 47c37c4..d7efdcf 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -199,7 +199,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
-  if (!LenC || !FillC || !FillC->getType()->isInteger(8))
+  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
     return 0;
   uint64_t Len = LenC->getZExtValue();
   Alignment = MI->getAlignment();
@@ -230,7 +230,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return 0;
 }
 
-
 /// visitCallInst - CallInst simplification.  This mostly only handles folding 
 /// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
 /// the heavy lifting.
@@ -304,6 +303,60 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   
   switch (II->getIntrinsicID()) {
   default: break;
+  case Intrinsic::objectsize: {
+    const Type *ReturnTy = CI.getType();
+    Value *Op1 = II->getOperand(1);
+    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+    
+    // We need target data for just about everything so depend on it.
+    if (!TD) break;
+    
+    // Get to the real allocated thing and offset as fast as possible.
+    Op1 = Op1->stripPointerCasts();
+    
+    // If we've stripped down to a single global variable that we
+    // can know the size of then just return that.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
+      if (GV->hasDefinitiveInitializer()) {
+        Constant *C = GV->getInitializer();
+        size_t globalSize = TD->getTypeAllocSize(C->getType());
+        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize));
+      } else {
+        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+        return ReplaceInstUsesWith(CI, RetVal);
+      }
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
+      
+      // Only handle constant GEPs here.
+      if (CE->getOpcode() != Instruction::GetElementPtr) break;
+      GEPOperator *GEP = cast<GEPOperator>(CE);
+      
+      // Make sure we're not a constant offset from an external
+      // global.
+      Value *Operand = GEP->getPointerOperand();
+      Operand = Operand->stripPointerCasts();
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
+        if (!GV->hasDefinitiveInitializer()) break;
+      
+      // Get what we're pointing to and its size. 
+      const PointerType *BaseType = 
+        cast<PointerType>(Operand->getType());
+      size_t Size = TD->getTypeAllocSize(BaseType->getElementType());
+      
+      // Get the current byte offset into the thing. Use the original
+      // operand in case we're looking through a bitcast.
+      SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
+      const PointerType *OffsetType =
+        cast<PointerType>(GEP->getPointerOperand()->getType());
+      size_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
+
+      assert(Size >= Offset);
+      
+      Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
+      return ReplaceInstUsesWith(CI, RetVal);
+      
+    }
+  }
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
     if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
@@ -632,18 +685,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return EraseInstFromFunction(CI);
     break;
   }
-  case Intrinsic::objectsize: {
-    ConstantInt *Const = cast<ConstantInt>(II->getOperand(2));
-    const Type *Ty = CI.getType();
-
-    // 0 is maximum number of bytes left, 1 is minimum number of bytes left.
-    // TODO: actually add these values, the current return values are "don't
-    // know".
-    if (Const->getZExtValue() == 0)
-      return ReplaceInstUsesWith(CI, Constant::getAllOnesValue(Ty));
-    else
-      return ReplaceInstUsesWith(CI, ConstantInt::get(Ty, 0));
-  }
   }
 
   return visitCallSite(II);
@@ -692,10 +733,14 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   Value *Callee = CS.getCalledValue();
 
   if (Function *CalleeF = dyn_cast<Function>(Callee))
-    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
+    // If the call and callee calling conventions don't match, this call must
+    // be unreachable, as the call is undefined.
+    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+        // Only do this for calls to a function with a body.  A prototype may
+        // not actually end up matching the implementation's calling conv for a
+        // variety of reasons (e.g. it may be written in assembly).
+        !CalleeF->isDeclaration()) {
       Instruction *OldCall = CS.getInstruction();
-      // If the call and callee calling conventions don't match, this call must
-      // be unreachable, as the call is undefined.
       new StoreInst(ConstantInt::getTrue(Callee->getContext()),
                 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 
                                   OldCall);
@@ -703,8 +748,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       // This allows ValueHandlers and custom metadata to adjust itself.
       if (!OldCall->getType()->isVoidTy())
         OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
-      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
+      if (isa<CallInst>(OldCall))
         return EraseInstFromFunction(*OldCall);
+      
+      // We cannot remove an invoke, because it would change the CFG, just
+      // change the callee to a null pointer.
+      cast<InvokeInst>(OldCall)->setOperand(0,
+                                    Constant::getNullValue(CalleeF->getType()));
       return 0;
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f25dd35..bb4a0e9 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -23,7 +23,7 @@ using namespace PatternMatch;
 ///
 static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
                                         int &Offset) {
-  assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+  assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
     Scale  = 0;
@@ -255,17 +255,26 @@ isEliminableCastPair(
   return Instruction::CastOps(Res);
 }
 
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated.  It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
-                                     const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+                                      const Type *Ty) {
+  // Noop casts and casts of constants should be eliminated trivially.
   if (V->getType() == Ty || isa<Constant>(V)) return false;
   
-  // If this is another cast that can be eliminated, it isn't codegen either.
+  // If this is another cast that can be eliminated, we prefer to have it
+  // eliminated.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD))
+    if (isEliminableCastPair(CI, opc, Ty, TD))
       return false;
+  
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
+    return false;
+  
   return true;
 }
 
@@ -828,7 +837,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
 
   // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
   Value *X;
-  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
       match(SrcI, m_Not(m_Value(X))) &&
       (!X->hasOneUse() || !isa<CmpInst>(X))) {
     Value *New = Builder->CreateZExt(X, CI.getType());
@@ -923,12 +932,6 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   Value *Src = CI.getOperand(0);
   const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
-  // Canonicalize sign-extend from i1 to a select.
-  if (Src->getType()->isInteger(1))
-    return SelectInst::Create(Src,
-                              Constant::getAllOnesValue(CI.getType()),
-                              Constant::getNullValue(CI.getType()));
-  
   // Attempt to extend the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
@@ -968,6 +971,30 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       return BinaryOperator::CreateAShr(Res, ShAmt);
     }
   
+  
+  // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
+  // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
+  {
+  ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
+  if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
+    // sext (x <s  0) to i32 --> x>>s31       true if signbit set.
+    // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+    if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
+      Value *Sh = ConstantInt::get(CmpLHS->getType(),
+                                   CmpLHS->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+      
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+  }
+  
+  
   // If the input is a shl/ashr pair of a same constant, then this is a sign
   // extension from a smaller value.  If we could trust arbitrary bitwidth
   // integers, we could turn this into a truncate to the smaller bit and then
@@ -1127,16 +1154,22 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
 }
 
 Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
-  // If the source integer type is larger than the intptr_t type for
-  // this target, do a trunc to the intptr_t type, then inttoptr of it.  This
-  // allows the trunc to be exposed to other transforms.  Don't do this for
-  // extending inttoptr's, because we don't know if the target sign or zero
-  // extends to pointers.
-  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
-      TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                    TD->getIntPtrType(CI.getContext()), "tmp");
-    return new IntToPtrInst(P, CI.getType());
+  // If the source integer type is not the intptr_t type for this target, do a
+  // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
+  // cast to be exposed to other transforms.
+  if (TD) {
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                      TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateZExt(CI.getOperand(0),
+                                     TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
   }
   
   if (Instruction *I = commonCastTransforms(CI))
@@ -1198,17 +1231,22 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
 }
 
 Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
-  // If the destination integer type is smaller than the intptr_t type for
-  // this target, do a ptrtoint to intptr_t then do a trunc.  This allows the
-  // trunc to be exposed to other transforms.  Don't do this for extending
-  // ptrtoint's, because we don't know if the target sign or zero extends its
-  // pointers.
-  if (TD &&
-      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                       TD->getIntPtrType(CI.getContext()),
-                                       "tmp");
-    return new TruncInst(P, CI.getType());
+  // If the destination integer type is not the intptr_t type for this target,
+  // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
+  // to be exposed to other transforms.
+  if (TD) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new TruncInst(P, CI.getType());
+    }
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new ZExtInst(P, CI.getType());
+    }
   }
   
   return commonPointerCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e59406c6..72af80f 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1589,24 +1589,24 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   
   /// Orders the operands of the compare so that they are listed from most
   /// complex to least complex.  This puts constants before unary operators,
   /// before binary operators.
-  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+  if (getComplexity(Op0) < getComplexity(Op1)) {
     I.swapOperands();
+    std::swap(Op0, Op1);
     Changed = true;
   }
   
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
   
   const Type *Ty = Op0->getType();
 
   // icmp's with boolean values can always be turned into bitwise operations
-  if (Ty == Type::getInt1Ty(I.getContext())) {
+  if (Ty->isIntegerTy(1)) {
     switch (I.getPredicate()) {
     default: llvm_unreachable("Invalid icmp instruction!");
     case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
@@ -1650,7 +1650,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   unsigned BitWidth = 0;
   if (TD)
     BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-  else if (Ty->isIntOrIntVector())
+  else if (Ty->isIntOrIntVectorTy())
     BitWidth = Ty->getScalarSizeInBits();
 
   bool isSignBit = false;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index ae728dd..e6c59c7 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -87,7 +87,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
 
     const Type *SrcPTy = SrcTy->getElementType();
 
-    if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 
+    if (DestPTy->isIntegerTy() || isa<PointerType>(DestPTy) || 
          isa<VectorType>(DestPTy)) {
       // If the source is an array, the code below will not succeed.  Check to
       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
@@ -104,7 +104,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
           }
 
       if (IC.getTargetData() &&
-          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
+          (SrcPTy->isIntegerTy() || isa<PointerType>(SrcPTy) || 
             isa<VectorType>(SrcPTy)) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
@@ -115,8 +115,9 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        Value *NewLoad = 
+        LoadInst *NewLoad = 
           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+        NewLoad->setAlignment(LI.getAlignment());
         // Now cast the result of the load.
         return new BitCastInst(NewLoad, LI.getType());
       }
@@ -199,12 +200,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     //
     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
-      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
-          isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
-        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
-                                        SI->getOperand(1)->getName()+".val");
-        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
-                                        SI->getOperand(2)->getName()+".val");
+      unsigned Align = LI.getAlignment();
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                           SI->getOperand(1)->getName()+".val");
+        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                           SI->getOperand(2)->getName()+".val");
+        V1->setAlignment(Align);
+        V2->setAlignment(Align);
         return SelectInst::Create(SI->getCondition(), V1, V2);
       }
 
@@ -239,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   const Type *SrcPTy = SrcTy->getElementType();
 
-  if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
+  if (!DestPTy->isIntegerTy() && !isa<PointerType>(DestPTy))
     return 0;
   
   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
@@ -273,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
-  if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
+  if (!SrcPTy->isIntegerTy() && !isa<PointerType>(SrcPTy))
     return 0;
   
   // If the pointers point into different address spaces or if they point to
@@ -294,7 +298,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   const Type* CastSrcTy = SIOp0->getType();
   const Type* CastDstTy = SrcPTy;
   if (isa<PointerType>(CastDstTy)) {
-    if (CastSrcTy->isInteger())
+    if (CastSrcTy->isIntegerTy())
       opcode = Instruction::IntToPtr;
   } else if (isa<IntegerType>(CastDstTy)) {
     if (isa<PointerType>(SIOp0->getType()))
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2e26a75..668c34f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -157,7 +157,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   }
 
   /// i1 mul -> i1 and.
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateAnd(Op0, Op1);
 
   // X*(1 << Y) --> X << Y
@@ -314,7 +314,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   // undef / X -> 0        for integer.
   // undef / X -> undef    for FP (the undef could be a snan).
   if (isa<UndefValue>(Op0)) {
-    if (Op0->getType()->isFPOrFPVector())
+    if (Op0->getType()->isFPOrFPVectorTy())
       return ReplaceInstUsesWith(I, Op0);
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
@@ -386,7 +386,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // It can't be division by zero, hence it must be division by one.
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return ReplaceInstUsesWith(I, Op0);
 
   if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
@@ -493,7 +493,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op0, Mask)) {
       if (MaskedValueIsZero(Op1, Mask)) {
@@ -527,7 +527,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op0)) {             // undef % X -> 0
-    if (I.getType()->isFPOrFPVector())
+    if (I.getType()->isFPOrFPVectorTy())
       return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
@@ -648,7 +648,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a urem.
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
       // X srem Y -> X urem Y, iff X and Y don't have sign bit set
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 18b2dff..7807d9a 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -326,44 +326,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
         break;
       }
       }
-
-      // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
-      // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
-      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
-      if (match(TrueVal, m_ConstantInt<-1>()) &&
-          match(FalseVal, m_ConstantInt<0>()))
-        Pred = ICI->getPredicate();
-      else if (match(TrueVal, m_ConstantInt<0>()) &&
-               match(FalseVal, m_ConstantInt<-1>()))
-        Pred = CmpInst::getInversePredicate(ICI->getPredicate());
-      
-      if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
-        // If we are just checking for a icmp eq of a single bit and zext'ing it
-        // to an integer, then shift the bit to the appropriate place and then
-        // cast to integer to avoid the comparison.
-        const APInt &Op1CV = CI->getValue();
-    
-        // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
-        // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
-        if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
-            (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
-          Value *In = ICI->getOperand(0);
-          Value *Sh = ConstantInt::get(In->getType(),
-                                       In->getType()->getScalarSizeInBits()-1);
-          In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                                   *ICI);
-          if (In->getType() != SI.getType())
-            In = CastInst::CreateIntegerCast(In, SI.getType(),
-                                             true/*SExt*/, "tmp", ICI);
-    
-          if (Pred == ICmpInst::ICMP_SGT)
-            In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
-                                       In->getName()+".not"), *ICI);
-    
-          return ReplaceInstUsesWith(SI, In);
-        }
-      }
     }
 
   if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
@@ -479,7 +441,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return ReplaceInstUsesWith(SI, FalseVal);
   }
 
-  if (SI.getType()->isInteger(1)) {
+  if (SI.getType()->isIntegerTy(1)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
@@ -516,16 +478,25 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
     if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
       // select C, 1, 0 -> zext C to int
-      if (FalseValC->isZero() && TrueValC->getValue() == 1) {
-        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
-      } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
-        // select C, 0, 1 -> zext !C to int
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                               "not."+CondVal->getName()), SI);
-        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
+      if (FalseValC->isZero() && TrueValC->getValue() == 1)
+        return new ZExtInst(CondVal, SI.getType());
+
+      // select C, -1, 0 -> sext C to int
+      if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+        return new SExtInst(CondVal, SI.getType());
+      
+      // select C, 0, 1 -> zext !C to int
+      if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new ZExtInst(NotCond, SI.getType());
       }
 
+      // select C, 0, -1 -> sext !C to int
+      if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new SExtInst(NotCond, SI.getType());
+      }
+      
       if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
         // If one of the constants is zero (we know they can't both be) and we
         // have an icmp instruction with zero, and we have an 'and' with the
@@ -547,8 +518,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                 ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
                 Value *V = ICA;
                 if (ShouldNotVal)
-                  V = InsertNewInstBefore(BinaryOperator::Create(
-                                  Instruction::Xor, V, ICA->getOperand(1)), SI);
+                  V = Builder->CreateXor(V, ICA->getOperand(1));
                 return ReplaceInstUsesWith(SI, V);
               }
       }
@@ -659,7 +629,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
   // See if we can fold the select into one of our operands.
-  if (SI.getType()->isInteger()) {
+  if (SI.getType()->isIntegerTy()) {
     if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
     
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 321c91d..836bda3 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -69,10 +70,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
       return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
-    else {
-      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
-      return &I;
-    }
+    // ashr i32 X, 32 --> ashr i32 X, 31
+    I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+    return &I;
   }
   
   // ((X*C1) << C2) == (X * (C1 << C2))
@@ -387,7 +387,29 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
-  return commonShiftTransforms(I);
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1))
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+      unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+      // ctlz.i32(x)>>5  --> zext(x == 0)
+      // cttz.i32(x)>>5  --> zext(x == 0)
+      // ctpop.i32(x)>>5 --> zext(x == -1)
+      if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+           II->getIntrinsicID() == Intrinsic::cttz ||
+           II->getIntrinsicID() == Intrinsic::ctpop) &&
+          isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
+        bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+        Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+        Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS);
+        return new ZExtInst(Cmp, II->getType());
+      }
+    }
+  
+  return 0;
 }
 
 Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 74a1b68..5e9a52f 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -107,7 +107,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   assert((TD || !isa<PointerType>(VTy)) &&
          "SimplifyDemandedBits needs to know bit widths!");
   assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
-         (!VTy->isIntOrIntVector() ||
+         (!VTy->isIntOrIntVectorTy() ||
           VTy->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
@@ -138,11 +138,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     return 0;
   
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-  APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
+  APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
 
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
     return 0;        // Only analyze instructions.
   }
 
@@ -219,7 +219,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   
   switch (I->getOpcode()) {
   default:
-    ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
     break;
   case Instruction::And:
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -249,9 +249,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I;
       
     // Output known-1 bits are only known if set in both the LHS & RHS.
-    RHSKnownOne &= LHSKnownOne;
+    KnownOne = RHSKnownOne & LHSKnownOne;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    RHSKnownZero |= LHSKnownZero;
+    KnownZero = RHSKnownZero | LHSKnownZero;
     break;
   case Instruction::Or:
     // If either the LHS or the RHS are One, the result is One.
@@ -286,9 +286,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I;
           
     // Output known-0 bits are only known if clear in both the LHS & RHS.
-    RHSKnownZero &= LHSKnownZero;
+    KnownZero = RHSKnownZero & LHSKnownZero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
-    RHSKnownOne |= LHSKnownOne;
+    KnownOne = RHSKnownOne | LHSKnownOne;
     break;
   case Instruction::Xor: {
     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
@@ -306,13 +306,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if ((DemandedMask & LHSKnownZero) == DemandedMask)
       return I->getOperand(1);
     
-    // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | 
-                         (RHSKnownOne & LHSKnownOne);
-    // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | 
-                        (RHSKnownOne & LHSKnownZero);
-    
     // If all of the demanded bits are known to be zero on one side or the
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
@@ -368,10 +361,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
         return InsertNewInstBefore(NewXor, *I);
       }
-          
-          
-    RHSKnownZero = KnownZeroOut;
-    RHSKnownOne  = KnownOneOut;
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
     break;
   }
   case Instruction::Select:
@@ -389,61 +383,61 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I;
     
     // Only known if known in both the LHS and RHS.
-    RHSKnownOne &= LHSKnownOne;
-    RHSKnownZero &= LHSKnownZero;
+    KnownOne = RHSKnownOne & LHSKnownOne;
+    KnownZero = RHSKnownZero & LHSKnownZero;
     break;
   case Instruction::Trunc: {
     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
     DemandedMask.zext(truncBf);
-    RHSKnownZero.zext(truncBf);
-    RHSKnownOne.zext(truncBf);
+    KnownZero.zext(truncBf);
+    KnownOne.zext(truncBf);
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
-                             RHSKnownZero, RHSKnownOne, Depth+1))
+                             KnownZero, KnownOne, Depth+1))
       return I;
     DemandedMask.trunc(BitWidth);
-    RHSKnownZero.trunc(BitWidth);
-    RHSKnownOne.trunc(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    KnownZero.trunc(BitWidth);
+    KnownOne.trunc(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
     break;
   }
   case Instruction::BitCast:
-    if (!I->getOperand(0)->getType()->isIntOrIntVector())
-      return false;  // vector->int or fp->int?
+    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+      return 0;  // vector->int or fp->int?
 
     if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
       if (const VectorType *SrcVTy =
             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
           // Don't touch a bitcast between vectors of different element counts.
-          return false;
+          return 0;
       } else
         // Don't touch a scalar-to-vector bitcast.
-        return false;
+        return 0;
     } else if (isa<VectorType>(I->getOperand(0)->getType()))
       // Don't touch a vector-to-scalar bitcast.
-      return false;
+      return 0;
 
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
+                             KnownZero, KnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
     break;
   case Instruction::ZExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
     DemandedMask.trunc(SrcBitWidth);
-    RHSKnownZero.trunc(SrcBitWidth);
-    RHSKnownOne.trunc(SrcBitWidth);
+    KnownZero.trunc(SrcBitWidth);
+    KnownOne.trunc(SrcBitWidth);
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
+                             KnownZero, KnownOne, Depth+1))
       return I;
     DemandedMask.zext(BitWidth);
-    RHSKnownZero.zext(BitWidth);
-    RHSKnownOne.zext(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
     // The top bits are known to be zero.
-    RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     break;
   }
   case Instruction::SExt: {
@@ -460,27 +454,27 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       InputDemandedBits.set(SrcBitWidth-1);
       
     InputDemandedBits.trunc(SrcBitWidth);
-    RHSKnownZero.trunc(SrcBitWidth);
-    RHSKnownOne.trunc(SrcBitWidth);
+    KnownZero.trunc(SrcBitWidth);
+    KnownOne.trunc(SrcBitWidth);
     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
+                             KnownZero, KnownOne, Depth+1))
       return I;
     InputDemandedBits.zext(BitWidth);
-    RHSKnownZero.zext(BitWidth);
-    RHSKnownOne.zext(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    KnownZero.zext(BitWidth);
+    KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
       
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
 
     // If the input sign bit is known zero, or if the NewBits are not demanded
     // convert this into a zero extension.
-    if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+    if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
       // Convert to ZExt cast
       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
       return InsertNewInstBefore(NewCast, *I);
-    } else if (RHSKnownOne[SrcBitWidth-1]) {    // Input sign bit known set
-      RHSKnownOne |= NewBits;
+    } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
+      KnownOne |= NewBits;
     }
     break;
   }
@@ -540,12 +534,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       
       // Bits are known one if they are known zero in one operand and one in the
       // other, and there is no input carry.
-      RHSKnownOne = ((LHSKnownZero & RHSVal) | 
-                     (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+      KnownOne = ((LHSKnownZero & RHSVal) | 
+                  (LHSKnownOne & ~RHSVal)) & ~CarryBits;
       
       // Bits are known zero if they are known zero in both operands and there
       // is no input carry.
-      RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+      KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
     } else {
       // If the high-bits of this ADD are not demanded, then it does not demand
       // the high bits of its LHS or RHS.
@@ -578,21 +572,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     }
     // Otherwise just hand the sub off to ComputeMaskedBits to fill in
     // the known zeros and ones.
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
-                               RHSKnownZero, RHSKnownOne, Depth+1))
+                               KnownZero, KnownOne, Depth+1))
         return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
-      RHSKnownZero <<= ShiftAmt;
-      RHSKnownOne  <<= ShiftAmt;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero <<= ShiftAmt;
+      KnownOne  <<= ShiftAmt;
       // low bits known zero.
       if (ShiftAmt)
-        RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+        KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
     }
     break;
   case Instruction::LShr:
@@ -603,15 +597,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               RHSKnownZero, RHSKnownOne, Depth+1))
+                               KnownZero, KnownOne, Depth+1))
         return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
-      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
-      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
       if (ShiftAmt) {
         // Compute the new bits that are at the top now.
         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-        RHSKnownZero |= HighBits;  // high bits known zero.
+        KnownZero |= HighBits;  // high bits known zero.
       }
     }
     break;
@@ -642,13 +636,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
         DemandedMaskIn.set(BitWidth-1);
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               RHSKnownZero, RHSKnownOne, Depth+1))
+                               KnownZero, KnownOne, Depth+1))
         return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       // Compute the new bits that are at the top now.
       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
-      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
         
       // Handle the sign bits.
       APInt SignBit(APInt::getSignBit(BitWidth));
@@ -657,14 +651,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 
+      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
           (HighBits & ~DemandedMask) == HighBits) {
         // Perform the logical shift right.
         Instruction *NewVal = BinaryOperator::CreateLShr(
                           I->getOperand(0), SA, I->getName());
         return InsertNewInstBefore(NewVal, *I);
-      } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one.
-        RHSKnownOne |= HighBits;
+      } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+        KnownOne |= HighBits;
       }
     }
     break;
@@ -681,10 +675,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                  LHSKnownZero, LHSKnownOne, Depth+1))
           return I;
 
+        // The low bits of LHS are unchanged by the srem.
+        KnownZero = LHSKnownZero & LowBits;
+        KnownOne = LHSKnownOne & LowBits;
+
+        // If LHS is non-negative or has all low bits zero, then the upper bits
+        // are all zero.
         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
-          LHSKnownZero |= ~LowBits;
+          KnownZero |= ~LowBits;
 
-        KnownZero |= LHSKnownZero & DemandedMask;
+        // If LHS is negative and not all low bits are zero, then the upper bits
+        // are all one.
+        if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+          KnownOne |= ~LowBits;
 
         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
       }
@@ -743,15 +746,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       }
       }
     }
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
     break;
   }
   
   // If the client is only demanding bits that we know, return the known
   // constant.
-  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
-    return Constant::getIntegerValue(VTy, RHSKnownOne);
-  return false;
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, KnownOne);
+  return 0;
 }
 
 
@@ -764,7 +767,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 /// operation, the operation is simplified, then the resultant value is
 /// returned.  This returns null if no change was made.
 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
-                                                APInt& UndefElts,
+                                                APInt &UndefElts,
                                                 unsigned Depth) {
   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
   APInt EltMask(APInt::getAllOnesValue(VWidth));
@@ -774,13 +777,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // If the entire vector is undefined, just return this info.
     UndefElts = EltMask;
     return 0;
-  } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+  }
+  
+  if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
     return UndefValue::get(V->getType());
   }
 
   UndefElts = 0;
-  if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
     Constant *Undef = UndefValue::get(EltTy);
 
@@ -789,23 +794,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
         Elts.push_back(Undef);
         UndefElts.set(i);
-      } else if (isa<UndefValue>(CP->getOperand(i))) {   // Already undef.
+      } else if (isa<UndefValue>(CV->getOperand(i))) {   // Already undef.
         Elts.push_back(Undef);
         UndefElts.set(i);
       } else {                               // Otherwise, defined.
-        Elts.push_back(CP->getOperand(i));
+        Elts.push_back(CV->getOperand(i));
       }
 
     // If we changed the constant, return it.
     Constant *NewCP = ConstantVector::get(Elts);
-    return NewCP != CP ? NewCP : 0;
-  } else if (isa<ConstantAggregateZero>(V)) {
+    return NewCP != CV ? NewCP : 0;
+  }
+  
+  if (isa<ConstantAggregateZero>(V)) {
     // Simplify the CAZ to a ConstantVector where the non-demanded elements are
     // set to undef.
     
     // Check if this is identity. If so, return 0 since we are not simplifying
     // anything.
-    if (DemandedElts == ((1ULL << VWidth) -1))
+    if (DemandedElts.isAllOnesValue())
       return 0;
     
     const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f11f557..20fda1a 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -162,7 +162,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     // property.
     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
       APInt UndefElts(VectorWidth, 0);
-      APInt DemandedMask(VectorWidth, 1 << IndexVal);
+      APInt DemandedMask(VectorWidth, 0);
+      DemandedMask.set(IndexVal);
       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
                                                 DemandedMask, UndefElts)) {
         EI.setOperand(0, V);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 93b1961..96c0342 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -158,7 +158,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
     return ConstantExpr::getNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isInteger())
+    if (C->getType()->getElementType()->isIntegerTy())
       return ConstantExpr::getNeg(C);
 
   return 0;
@@ -177,7 +177,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
     return ConstantExpr::getFNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isFloatingPoint())
+    if (C->getType()->getElementType()->isFloatingPointTy())
       return ConstantExpr::getFNeg(C);
 
   return 0;
@@ -226,7 +226,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
 
   if (isa<Constant>(TV) || isa<Constant>(FV)) {
     // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType()->isInteger(1)) return 0;
+    if (SI->getType()->isIntegerTy(1)) return 0;
 
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
@@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
       
-      if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) {
+      if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isIntegerTy(8)) {
         uint64_t ArrayEltSize =
             TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
         
diff --git a/lib/Transforms/InstCombine/Makefile b/lib/Transforms/InstCombine/Makefile
index f9de42a..0c488e78 100644
--- a/lib/Transforms/InstCombine/Makefile
+++ b/lib/Transforms/InstCombine/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMInstCombine
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/Instrumentation/Makefile b/lib/Transforms/Instrumentation/Makefile
index 1238896..6cbc7a9 100644
--- a/lib/Transforms/Instrumentation/Makefile
+++ b/lib/Transforms/Instrumentation/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMInstrumentation
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 3214c8c..8662a82 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     AI = MainFn->arg_begin();
     // If the program looked at argc, have it look at the return value of the
     // init call instead.
-    if (!AI->getType()->isInteger(32)) {
+    if (!AI->getType()->isIntegerTy(32)) {
       Instruction::CastOps opcode;
       if (!AI->use_empty()) {
         opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index c3139a5..21e6f89 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -32,7 +32,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
@@ -40,9 +39,6 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
-                                       cl::init(false), cl::Hidden);
-
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -63,6 +59,10 @@ namespace {
       AU.addPreserved<ProfileInfo>();
     }
 
+    virtual void releaseMemory() {
+      BackEdges.clear();
+    }
+
   private:
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -297,6 +297,70 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
+/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
+/// lives in to see if there is a block that we can reuse as a critical edge
+/// from TIBB.
+static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
+  BasicBlock *Dest = DestPHI->getParent();
+  
+  /// TIPHIValues - This array is lazily computed to determine the values of
+  /// PHIs in Dest that TI would provide.
+  SmallVector<Value*, 32> TIPHIValues;
+  
+  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
+  unsigned TIBBEntryNo = 0;
+  
+  // Check to see if Dest has any blocks that can be used as a split edge for
+  // this terminator.
+  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
+    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
+    // To be usable, the pred has to end with an uncond branch to the dest.
+    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!PredBr || !PredBr->isUnconditional())
+      continue;
+    // Must be empty other than the branch and debug info.
+    BasicBlock::iterator I = Pred->begin();
+    while (isa<DbgInfoIntrinsic>(I))
+      I++;
+    if (&*I != PredBr)
+      continue;
+    // Cannot be the entry block; its label does not get emitted.
+    if (Pred == &Dest->getParent()->getEntryBlock())
+      continue;
+    
+    // Finally, since we know that Dest has phi nodes in it, we have to make
+    // sure that jumping to Pred will have the same effect as going to Dest in
+    // terms of PHI values.
+    PHINode *PN;
+    unsigned PHINo = 0;
+    unsigned PredEntryNo = pi;
+    
+    bool FoundMatch = true;
+    for (BasicBlock::iterator I = Dest->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+      if (PHINo == TIPHIValues.size()) {
+        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
+          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
+        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
+      }
+      
+      // If the PHI entry doesn't work, we can't use this pred.
+      if (PN->getIncomingBlock(PredEntryNo) != Pred)
+        PredEntryNo = PN->getBasicBlockIndex(Pred);
+      
+      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
+        FoundMatch = false;
+        break;
+      }
+    }
+    
+    // If we found a workable predecessor, change TI to branch to Succ.
+    if (FoundMatch)
+      return Pred;
+  }
+  return 0;  
+}
+
 
 /// SplitEdgeNicely - Split the critical edge from TI to its specified
 /// successor if it will improve codegen.  We only do this if the successor has
@@ -311,13 +375,12 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
   BasicBlock *Dest = TI->getSuccessor(SuccNum);
   assert(isa<PHINode>(Dest->begin()) &&
          "This should only be called if Dest has a PHI!");
+  PHINode *DestPHI = cast<PHINode>(Dest->begin());
 
   // Do not split edges to EH landing pads.
-  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) {
+  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
     if (Invoke->getSuccessor(1) == Dest)
       return;
-  }
-  
 
   // As a hack, never split backedges of loops.  Even though the copy for any
   // PHIs inserted on the backedge would be dead for exits from the loop, we
@@ -325,92 +388,16 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
   if (BackEdges.count(std::make_pair(TIBB, Dest)))
     return;
 
-  if (!FactorCommonPreds) {
-    /// TIPHIValues - This array is lazily computed to determine the values of
-    /// PHIs in Dest that TI would provide.
-    SmallVector<Value*, 32> TIPHIValues;
-
-    // Check to see if Dest has any blocks that can be used as a split edge for
-    // this terminator.
-    for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
-      BasicBlock *Pred = *PI;
-      // To be usable, the pred has to end with an uncond branch to the dest.
-      BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
-      if (!PredBr || !PredBr->isUnconditional())
-        continue;
-      // Must be empty other than the branch and debug info.
-      BasicBlock::iterator I = Pred->begin();
-      while (isa<DbgInfoIntrinsic>(I))
-        I++;
-      if (dyn_cast<Instruction>(I) != PredBr)
-        continue;
-      // Cannot be the entry block; its label does not get emitted.
-      if (Pred == &(Dest->getParent()->getEntryBlock()))
-        continue;
-
-      // Finally, since we know that Dest has phi nodes in it, we have to make
-      // sure that jumping to Pred will have the same effect as going to Dest in
-      // terms of PHI values.
-      PHINode *PN;
-      unsigned PHINo = 0;
-      bool FoundMatch = true;
-      for (BasicBlock::iterator I = Dest->begin();
-           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
-        if (PHINo == TIPHIValues.size())
-          TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
-        // If the PHI entry doesn't work, we can't use this pred.
-        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
-          FoundMatch = false;
-          break;
-        }
-      }
-
-      // If we found a workable predecessor, change TI to branch to Succ.
-      if (FoundMatch) {
-        ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
-        if (PFI)
-          PFI->splitEdge(TIBB, Dest, Pred);
-        Dest->removePredecessor(TIBB);
-        TI->setSuccessor(SuccNum, Pred);
-        return;
-      }
-    }
-
-    SplitCriticalEdge(TI, SuccNum, P, true);
+  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
+    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PFI)
+      PFI->splitEdge(TIBB, Dest, ReuseBB);
+    Dest->removePredecessor(TIBB);
+    TI->setSuccessor(SuccNum, ReuseBB);
     return;
   }
 
-  PHINode *PN;
-  SmallVector<Value*, 8> TIPHIValues;
-  for (BasicBlock::iterator I = Dest->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I)
-    TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
-  SmallVector<BasicBlock*, 8> IdenticalPreds;
-  for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
-    BasicBlock *Pred = *PI;
-    if (BackEdges.count(std::make_pair(Pred, Dest)))
-      continue;
-    if (PI == TIBB)
-      IdenticalPreds.push_back(Pred);
-    else {
-      bool Identical = true;
-      unsigned PHINo = 0;
-      for (BasicBlock::iterator I = Dest->begin();
-           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
-        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
-          Identical = false;
-          break;
-        }
-      if (Identical)
-        IdenticalPreds.push_back(Pred);
-    }
-  }
-
-  assert(!IdenticalPreds.empty());
-  SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
-                         ".critedge", P);
+  SplitCriticalEdge(TI, SuccNum, P, true);
 }
 
 
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 320afa1..09c01d3 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -44,8 +44,14 @@ namespace {
 
     virtual bool runOnFunction(Function &F) {
       bool Changed = false;
+      
+      DominatorTree &DT = getAnalysis<DominatorTree>();
+      
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-        Changed |= runOnBasicBlock(*I);
+        // Only check non-dead blocks.  Dead blocks may have strange pointer
+        // cycles that will confuse alias analysis.
+        if (DT.isReachableFromEntry(I))
+          Changed |= runOnBasicBlock(*I);
       return Changed;
     }
     
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index b29fe74..3ce7482 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -60,6 +60,7 @@ STATISTIC(NumPRELoad,   "Number of loads PRE'd");
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
 
 //===----------------------------------------------------------------------===//
 //                         ValueTable Class
@@ -1522,8 +1523,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   while (TmpBB->getSinglePredecessor()) {
     isSinglePred = true;
     TmpBB = TmpBB->getSinglePredecessor();
-    if (!TmpBB) // If haven't found any, bail now.
-      return false;
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
       return false;
     if (Blockers.count(TmpBB))
@@ -1539,10 +1538,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // at least one of the values is LI.  Since this means that we won't be able
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].isSimpleValue() &&
-        ValuesPerBlock[i].getSimpleValue() == LI)
-      return false;
+  if (!EnableFullLoadPRE) {
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+      if (ValuesPerBlock[i].isSimpleValue() &&
+          ValuesPerBlock[i].getSimpleValue() == LI)
+        return false;
+  }
 
   // FIXME: It is extremely unclear what this loop is doing, other than
   // artificially restricting loadpre.
@@ -1566,13 +1567,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       return false;
   }
 
-  // Okay, we have some hope :).  Check to see if the loaded value is fully
-  // available in all but one predecessor.
-  // FIXME: If we could restructure the CFG, we could make a common pred with
-  // all the preds that don't have an available LI and insert a new load into
-  // that one block.
-  BasicBlock *UnavailablePred = 0;
-
+  // Check to see how many predecessors have the loaded value fully
+  // available.
+  DenseMap<BasicBlock*, Value*> PredLoads;
   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
     FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
@@ -1581,79 +1578,93 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
        PI != E; ++PI) {
-    if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+    BasicBlock *Pred = *PI;
+    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
       continue;
-
-    // If this load is not available in multiple predecessors, reject it.
-    if (UnavailablePred && UnavailablePred != *PI)
+    }
+    PredLoads[Pred] = 0;
+    // We don't currently handle critical edges :(
+    if (Pred->getTerminator()->getNumSuccessors() != 1) {
+      DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+            << Pred->getName() << "': " << *LI << '\n');
       return false;
-    UnavailablePred = *PI;
+    }
   }
 
-  assert(UnavailablePred != 0 &&
+  // Decide whether PRE is profitable for this load.
+  unsigned NumUnavailablePreds = PredLoads.size();
+  assert(NumUnavailablePreds != 0 &&
          "Fully available value should be eliminated above!");
-
-  // We don't currently handle critical edges :(
-  if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
-    DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
-                 << UnavailablePred->getName() << "': " << *LI << '\n');
-    return false;
+  if (!EnableFullLoadPRE) {
+    // If this load is unavailable in multiple predecessors, reject it.
+    // FIXME: If we could restructure the CFG, we could make a common pred with
+    // all the preds that don't have an available LI and insert a new load into
+    // that one block.
+    if (NumUnavailablePreds != 1)
+      return false;
   }
-  
-  // Do PHI translation to get its value in the predecessor if necessary.  The
-  // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
-  //
+
+  // Check if the load can safely be moved to all the unavailable predecessors.
+  bool CanDoPRE = true;
   SmallVector<Instruction*, 8> NewInsts;
-  
-  // If all preds have a single successor, then we know it is safe to insert the
-  // load on the pred (?!?), so we can insert code to materialize the pointer if
-  // it is not available.
-  PHITransAddr Address(LI->getOperand(0), TD);
-  Value *LoadPtr = 0;
-  if (allSingleSucc) {
-    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
-                                                *DT, NewInsts);
-  } else {
-    Address.PHITranslateValue(LoadBB, UnavailablePred);
-    LoadPtr = Address.getAddr();
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+
+    // Do PHI translation to get its value in the predecessor if necessary.  The
+    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+    // If all preds have a single successor, then we know it is safe to insert
+    // the load on the pred (?!?), so we can insert code to materialize the
+    // pointer if it is not available.
+    PHITransAddr Address(LI->getOperand(0), TD);
+    Value *LoadPtr = 0;
+    if (allSingleSucc) {
+      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                  *DT, NewInsts);
+    } else {
+      Address.PHITranslateValue(LoadBB, UnavailablePred);
+      LoadPtr = Address.getAddr();
     
-    // Make sure the value is live in the predecessor.
-    if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
-      if (!DT->dominates(Inst->getParent(), UnavailablePred))
-        LoadPtr = 0;
-  }
+      // Make sure the value is live in the predecessor.
+      if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
+        if (!DT->dominates(Inst->getParent(), UnavailablePred))
+          LoadPtr = 0;
+    }
 
-  // If we couldn't find or insert a computation of this phi translated value,
-  // we fail PRE.
-  if (LoadPtr == 0) {
-    assert(NewInsts.empty() && "Shouldn't insert insts on failure");
-    DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
-                 << *LI->getOperand(0) << "\n");
-    return false;
-  }
+    // If we couldn't find or insert a computation of this phi translated value,
+    // we fail PRE.
+    if (LoadPtr == 0) {
+      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+            << *LI->getOperand(0) << "\n");
+      CanDoPRE = false;
+      break;
+    }
 
-  // Assign value numbers to these new instructions.
-  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
-    // FIXME: We really _ought_ to insert these value numbers into their 
-    // parent's availability map.  However, in doing so, we risk getting into
-    // ordering issues.  If a block hasn't been processed yet, we would be
-    // marking a value as AVAIL-IN, which isn't what we intend.
-    VN.lookup_or_add(NewInsts[i]);
+    // Make sure it is valid to move this load here.  We have to watch out for:
+    //  @1 = getelementptr (i8* p, ...
+    //  test p and branch if == 0
+    //  load @1
+    // It is valid to have the getelementptr before the test, even if p can be 0,
+    // as getelementptr only does address arithmetic.
+    // If we are not pushing the value through any multiple-successor blocks
+    // we do not have this case.  Otherwise, check that the load is safe to
+    // put anywhere; this can be improved, but should be conservatively safe.
+    if (!allSingleSucc &&
+        // FIXME: REEVALUTE THIS.
+        !isSafeToLoadUnconditionally(LoadPtr,
+                                     UnavailablePred->getTerminator(),
+                                     LI->getAlignment(), TD)) {
+      CanDoPRE = false;
+      break;
+    }
+
+    I->second = LoadPtr;
   }
-  
-  // Make sure it is valid to move this load here.  We have to watch out for:
-  //  @1 = getelementptr (i8* p, ...
-  //  test p and branch if == 0
-  //  load @1
-  // It is valid to have the getelementptr before the test, even if p can be 0,
-  // as getelementptr only does address arithmetic.
-  // If we are not pushing the value through any multiple-successor blocks
-  // we do not have this case.  Otherwise, check that the load is safe to
-  // put anywhere; this can be improved, but should be conservatively safe.
-  if (!allSingleSucc &&
-      // FIXME: REEVALUTE THIS.
-      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) {
-    assert(NewInsts.empty() && "Should not have inserted instructions");
+
+  if (!CanDoPRE) {
+    while (!NewInsts.empty())
+      NewInsts.pop_back_val()->eraseFromParent();
     return false;
   }
 
@@ -1665,12 +1676,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
                  << *NewInsts.back() << '\n');
   
-  Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
-                                LI->getAlignment(),
-                                UnavailablePred->getTerminator());
+  // Assign value numbers to the new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
 
-  // Add the newly created load.
-  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+    Value *LoadPtr = I->second;
+
+    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+                                  LI->getAlignment(),
+                                  UnavailablePred->getTerminator());
+
+    // Add the newly created load.
+    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+                                                        NewLoad));
+  }
 
   // Perform PHI construction.
   Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
@@ -1864,6 +1891,10 @@ Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I,
                              SmallVectorImpl<Instruction*> &toErase) {
+  // Ignore dbg info intrinsics.
+  if (isa<DbgInfoIntrinsic>(I))
+    return false;
+
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     bool Changed = processLoad(LI, toErase);
 
@@ -2075,7 +2106,7 @@ bool GVN::performPRE(Function &F) {
       for (pred_iterator PI = pred_begin(CurrentBlock),
            PE = pred_end(CurrentBlock); PI != PE; ++PI) {
         // We're not interested in PRE where the block is its
-        // own predecessor, on in blocks with predecessors
+        // own predecessor, or in blocks with predecessors
         // that are not reachable.
         if (*PI == CurrentBlock) {
           NumWithout = 2;
@@ -2123,10 +2154,10 @@ bool GVN::performPRE(Function &F) {
         continue;
       }
 
-      // Instantiate the expression the in predecessor that lacked it.
+      // Instantiate the expression in the predecessor that lacked it.
       // Because we are going top-down through the block, all value numbers
       // will be available in the predecessor by the time we need them.  Any
-      // that weren't original present will have been instantiated earlier
+      // that weren't originally present will have been instantiated earlier
       // in this loop.
       Instruction *PREInstr = CurInst->clone();
       bool success = true;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 17f7d98..5302fdc 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -364,23 +364,17 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (ExitingBlock)
       NeedCannIV = true;
   }
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-    const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
+  for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    const Type *Ty =
+      SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
     if (!LargestType ||
         SE->getTypeSizeInBits(Ty) >
           SE->getTypeSizeInBits(LargestType))
       LargestType = Ty;
-
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
-    if (!SI->second->Users.empty())
-      NeedCannIV = true;
+    NeedCannIV = true;
   }
 
-  // Now that we know the largest of of the induction variable expressions
+  // Now that we know the largest of the induction variable expressions
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
   if (NeedCannIV) {
@@ -455,72 +449,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
   // add the offsets to the primary induction variable and cast, avoiding
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    ilist<IVStrideUse> &List = SI->second->Users;
-    for (ilist<IVStrideUse>::iterator UI = List.begin(),
-         E = List.end(); UI != E; ++UI) {
-      Value *Op = UI->getOperandValToReplace();
-      const Type *UseTy = Op->getType();
-      Instruction *User = UI->getUser();
-
-      // Compute the final addrec to expand into code.
-      const SCEV *AR = IU->getReplacementExpr(*UI);
-
-      // Evaluate the expression out of the loop, if possible.
-      if (!L->contains(UI->getUser())) {
-        const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-        if (ExitVal->isLoopInvariant(L))
-          AR = ExitVal;
-      }
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    const SCEV *Stride = UI->getStride();
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
+
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
+
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (ExitVal->isLoopInvariant(L))
+        AR = ExitVal;
+    }
 
-      // FIXME: It is an extremely bad idea to indvar substitute anything more
-      // complex than affine induction variables.  Doing so will put expensive
-      // polynomial evaluations inside of the loop, and the str reduction pass
-      // currently can only reduce affine polynomials.  For now just disable
-      // indvar subst on anything more complex than an affine addrec, unless
-      // it can be expanded to a trivial value.
-      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
-        continue;
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+      continue;
 
-      // Determine the insertion point for this user. By default, insert
-      // immediately before the user. The SCEVExpander class will automatically
-      // hoist loop invariants out of the loop. For PHI nodes, there may be
-      // multiple uses, so compute the nearest common dominator for the
-      // incoming blocks.
-      Instruction *InsertPt = User;
-      if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
-        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-          if (PHI->getIncomingValue(i) == Op) {
-            if (InsertPt == User)
-              InsertPt = PHI->getIncomingBlock(i)->getTerminator();
-            else
-              InsertPt =
-                DT->findNearestCommonDominator(InsertPt->getParent(),
-                                               PHI->getIncomingBlock(i))
-                      ->getTerminator();
-          }
-
-      // Now expand it into actual Instructions and patch it into place.
-      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-      // Patch the new value into place.
-      if (Op->hasName())
-        NewVal->takeName(Op);
-      User->replaceUsesOfWith(Op, NewVal);
-      UI->setOperandValToReplace(NewVal);
-      DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                   << "   into = " << *NewVal << "\n");
-      ++NumRemoved;
-      Changed = true;
-
-      // The old value may be dead now.
-      DeadInsts.push_back(Op);
-    }
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
   }
 
   // Clear the rewriter cache, because values that are in the rewriter's cache
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 9531311..8f21aac 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -336,13 +336,18 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
       else
         InterestingVal = ConstantInt::getFalse(I->getContext());
       
-      // Scan for the sentinel.
+      // Scan for the sentinel.  If we find an undef, force it to the
+      // interesting value: x|undef -> true and x&undef -> false.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
-        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
           Result.push_back(LHSVals[i]);
+          Result.back().first = InterestingVal;
+        }
       for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
-        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
           Result.push_back(RHSVals[i]);
+          Result.back().first = InterestingVal;
+        }
       return !Result.empty();
     }
     
@@ -400,7 +405,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
     // If comparing a live-in value against a constant, see if we know the
     // live-in value on any predecessors.
     if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
-        Cmp->getType()->isInteger() && // Not vector compare.
+        Cmp->getType()->isIntegerTy() && // Not vector compare.
         (!isa<Instruction>(Cmp->getOperand(0)) ||
          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
       Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
@@ -451,6 +456,12 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
 /// ProcessBlock - If there are any predecessors whose control can be threaded
 /// through to a successor, transform them now.
 bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+  // If the block is trivially dead, just return and let the caller nuke it.
+  // This simplifies other transformations.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock())
+    return false;
+  
   // If this block has a single predecessor, and if that pred has a single
   // successor, merge the blocks.  This encourages recursive jump threading
   // because now the condition in this block can be threaded through
@@ -1117,6 +1128,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
       isa<ConstantInt>(BO->getOperand(1)))
     return false;
   
+  // If the first instruction in BB isn't a phi, we won't be able to infer
+  // anything special about any particular predecessor.
+  if (!isa<PHINode>(BB->front()))
+    return false;
+  
   // If we have a xor as the branch input to this block, and we know that the
   // LHS or RHS of the xor in any predecessor is true/false, then we can clone
   // the condition into the predecessor and fix that value to true, saving some
@@ -1174,6 +1190,26 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
     BlocksToFoldInto.push_back(XorOpValues[i].second);
   }
   
+  // If we inferred a value for all of the predecessors, then duplication won't
+  // help us.  However, we can just replace the LHS or RHS with the constant.
+  if (BlocksToFoldInto.size() ==
+      cast<PHINode>(BB->front()).getNumIncomingValues()) {
+    if (SplitVal == 0) {
+      // If all preds provide undef, just nuke the xor, because it is undef too.
+      BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+      BO->eraseFromParent();
+    } else if (SplitVal->isZero()) {
+      // If all preds provide 0, replace the xor with the other input.
+      BO->replaceAllUsesWith(BO->getOperand(isLHS));
+      BO->eraseFromParent();
+    } else {
+      // If all preds provide 1, set the computed value to 1.
+      BO->setOperand(!isLHS, SplitVal);
+    }
+    
+    return true;
+  }
+  
   // Try to duplicate BB into PredBB.
   return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
 }
@@ -1393,9 +1429,9 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   
   // Unless PredBB ends with an unconditional branch, split the edge so that we
   // can just clone the bits from BB into the end of the new PredBB.
-  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
   
-  if (!OldPredBranch->isUnconditional()) {
+  if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
     PredBB = SplitEdge(PredBB, BB, this);
     OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
   }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fa820ed..240b298 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -17,6 +17,40 @@
 // available on the target, and it performs a variety of other optimizations
 // related to loop induction variables.
 //
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+//   %i = phi [ 0, %entry ], [ %i.next, %latch ]
+//   ...
+//   %i.next = add %i, 1
+//   %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+//       instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+//       smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+//       multiple base registers, or as the increment expression in an addrec),
+//       we may not actually need both reg and (-1 * reg) in registers; the
+//       negation can be implemented by using a sub instead of an add. The
+//       lack of support for taking this into consideration when making
+//       register pressure decisions is partly worked around by the "Special"
+//       use kind.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-reduce"
@@ -26,208 +60,401 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 using namespace llvm;
 
-STATISTIC(NumReduced ,    "Number of IV uses strength reduced");
-STATISTIC(NumInserted,    "Number of PHIs inserted");
-STATISTIC(NumVariable,    "Number of PHIs with variable strides");
-STATISTIC(NumEliminated,  "Number of strides eliminated");
-STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
-STATISTIC(NumImmSunk,     "Number of common expr immediates sunk into uses");
-STATISTIC(NumLoopCond,    "Number of loop terminating conds optimized");
-STATISTIC(NumCountZero,   "Number of count iv optimized to count toward zero");
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+  /// UsedByIndices - This represents the set of LSRUse indices which reference
+  /// a particular register.
+  SmallBitVector UsedByIndices;
+
+  RegSortData() {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
-                                       cl::init(false),
-                                       cl::Hidden);
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+  OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+  print(errs()); errs() << '\n';
+}
 
 namespace {
 
-  struct BasedUser;
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
 
-  /// IVInfo - This structure keeps track of one IV expression inserted during
-  /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
-  /// well as the PHI node and increment value created for rewrite.
-  struct IVExpr {
-    const SCEV *Stride;
-    const SCEV *Base;
-    PHINode    *PHI;
+  RegUsesTy RegUses;
+  SmallVector<const SCEV *, 16> RegSequence;
 
-    IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
-      : Stride(stride), Base(base), PHI(phi) {}
-  };
+public:
+  void CountRegister(const SCEV *Reg, size_t LUIdx);
+
+  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+  void clear();
+
+  typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+  iterator begin() { return RegSequence.begin(); }
+  iterator end()   { return RegSequence.end(); }
+  const_iterator begin() const { return RegSequence.begin(); }
+  const_iterator end() const   { return RegSequence.end(); }
+};
+
+}
 
-  /// IVsOfOneStride - This structure keeps track of all IV expression inserted
-  /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
-  struct IVsOfOneStride {
-    std::vector<IVExpr> IVs;
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+  std::pair<RegUsesTy::iterator, bool> Pair =
+    RegUses.insert(std::make_pair(Reg, RegSortData()));
+  RegSortData &RSD = Pair.first->second;
+  if (Pair.second)
+    RegSequence.push_back(Reg);
+  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+  RSD.UsedByIndices.set(LUIdx);
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+  if (!RegUses.count(Reg)) return false;
+  const SmallBitVector &UsedByIndices =
+    RegUses.find(Reg)->second.UsedByIndices;
+  int i = UsedByIndices.find_first();
+  if (i == -1) return false;
+  if ((size_t)i != LUIdx) return true;
+  return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+  RegUsesTy::const_iterator I = RegUses.find(Reg);
+  assert(I != RegUses.end() && "Unknown register!");
+  return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+  RegUses.clear();
+  RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+  /// AM - This is used to represent complex addressing, as well as other kinds
+  /// of interesting uses.
+  TargetLowering::AddrMode AM;
+
+  /// BaseRegs - The list of "base" registers for this use. When this is
+  /// non-empty, AM.HasBaseReg should be set to true.
+  SmallVector<const SCEV *, 2> BaseRegs;
 
-    void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
-      IVs.push_back(IVExpr(Stride, Base, PHI));
+  /// ScaledReg - The 'scaled' register for this use. This should be non-null
+  /// when AM.Scale is not zero.
+  const SCEV *ScaledReg;
+
+  Formula() : ScaledReg(0) {}
+
+  void InitialMatch(const SCEV *S, Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+
+  unsigned getNumRegs() const;
+  const Type *getType() const;
+
+  bool referencesReg(const SCEV *S) const;
+  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                  const RegUseTracker &RegUses) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recurrsion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+                           SmallVectorImpl<const SCEV *> &Good,
+                           SmallVectorImpl<const SCEV *> &Bad,
+                           ScalarEvolution &SE, DominatorTree &DT) {
+  // Collect expressions which properly dominate the loop header.
+  if (S->properlyDominates(L->getHeader(), &DT)) {
+    Good.push_back(S);
+    return;
+  }
+
+  // Look at add operands.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      DoInitialMatch(*I, L, Good, Bad, SE, DT);
+    return;
+  }
+
+  // Look at addrec operands.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    if (!AR->getStart()->isZero()) {
+      DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+      DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                      AR->getStepRecurrence(SE),
+                                      AR->getLoop()),
+                     L, Good, Bad, SE, DT);
+      return;
     }
-  };
 
-  class LoopStrengthReduce : public LoopPass {
-    IVUsers *IU;
-    ScalarEvolution *SE;
-    bool Changed;
-
-    /// IVsByStride - Keep track of all IVs that have been inserted for a
-    /// particular stride.
-    std::map<const SCEV *, IVsOfOneStride> IVsByStride;
-
-    /// DeadInsts - Keep track of instructions we may have made dead, so that
-    /// we can remove them after we are done working.
-    SmallVector<WeakVH, 16> DeadInsts;
-
-    /// TLI - Keep a pointer of a TargetLowering to consult for determining
-    /// transformation profitability.
-    const TargetLowering *TLI;
-
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
-      LoopPass(&ID), TLI(tli) {}
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      // We split critical edges, so we change the CFG.  However, we do update
-      // many analyses if they are around.
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addPreserved("loops");
-      AU.addPreserved("domfrontier");
-      AU.addPreserved("domtree");
-
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addRequired<ScalarEvolution>();
-      AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<IVUsers>();
-      AU.addPreserved<IVUsers>();
+  // Handle a multiplication by -1 (negation) if it didn't fold.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+    if (Mul->getOperand(0)->isAllOnesValue()) {
+      SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+      const SCEV *NewMul = SE.getMulExpr(Ops);
+
+      SmallVector<const SCEV *, 4> MyGood;
+      SmallVector<const SCEV *, 4> MyBad;
+      DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+        SE.getEffectiveSCEVType(NewMul->getType())));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+           E = MyGood.end(); I != E; ++I)
+        Good.push_back(SE.getMulExpr(NegOne, *I));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+           E = MyBad.end(); I != E; ++I)
+        Bad.push_back(SE.getMulExpr(NegOne, *I));
+      return;
     }
 
-  private:
-    void OptimizeIndvars(Loop *L);
-
-    /// OptimizeLoopTermCond - Change loop terminating condition to use the
-    /// postinc iv when possible.
-    void OptimizeLoopTermCond(Loop *L);
-
-    /// OptimizeShadowIV - If IV is used in a int-to-float cast
-    /// inside the loop then try to eliminate the cast opeation.
-    void OptimizeShadowIV(Loop *L);
-
-    /// OptimizeMax - Rewrite the loop's terminating condition
-    /// if it uses a max computation.
-    ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
-                          IVStrideUse* &CondUse);
-
-    /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
-    /// deciding when to exit the loop is used only for that purpose, try to
-    /// rearrange things so it counts down to a test against zero.
-    bool OptimizeLoopCountIV(Loop *L);
-    bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
-                                     IVStrideUse* &CondUse, Loop *L);
-
-    /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
-    /// single stride of IV.  All of the users may have different starting
-    /// values, and this may not be the only stride.
-    void StrengthReduceIVUsersOfStride(const SCEV *Stride,
-                                      IVUsersOfOneStride &Uses,
-                                      Loop *L);
-    void StrengthReduceIVUsers(Loop *L);
-
-    ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                  IVStrideUse* &CondUse,
-                                  const SCEV* &CondStride,
-                                  bool PostPass = false);
-
-    bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                           const SCEV* &CondStride);
-    bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
-                             IVExpr&, const Type*,
-                             const std::vector<BasedUser>& UsersToProcess);
-    bool ValidScale(bool, int64_t,
-                    const std::vector<BasedUser>& UsersToProcess);
-    bool ValidOffset(bool, int64_t, int64_t,
-                     const std::vector<BasedUser>& UsersToProcess);
-    const SCEV *CollectIVUsers(const SCEV *Stride,
-                              IVUsersOfOneStride &Uses,
-                              Loop *L,
-                              bool &AllUsesAreAddresses,
-                              bool &AllUsesAreOutsideLoop,
-                              std::vector<BasedUser> &UsersToProcess);
-    bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
-    bool ShouldUseFullStrengthReductionMode(
-                                const std::vector<BasedUser> &UsersToProcess,
-                                const Loop *L,
-                                bool AllUsesAreAddresses,
-                                const SCEV *Stride);
-    void PrepareToStrengthReduceFully(
-                             std::vector<BasedUser> &UsersToProcess,
-                             const SCEV *Stride,
-                             const SCEV *CommonExprs,
-                             const Loop *L,
-                             SCEVExpander &PreheaderRewriter);
-    void PrepareToStrengthReduceFromSmallerStride(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         Value *CommonBaseV,
-                                         const IVExpr &ReuseIV,
-                                         Instruction *PreInsertPt);
-    void PrepareToStrengthReduceWithNewPhi(
-                                  std::vector<BasedUser> &UsersToProcess,
-                                  const SCEV *Stride,
-                                  const SCEV *CommonExprs,
-                                  Value *CommonBaseV,
-                                  Instruction *IVIncInsertPt,
-                                  const Loop *L,
-                                  SCEVExpander &PreheaderRewriter);
-
-    void DeleteTriviallyDeadInstructions();
-  };
+  // Ok, we can't do anything interesting. Just stuff the whole thing into a
+  // register and hope for the best.
+  Bad.push_back(S);
 }
 
-char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT) {
+  SmallVector<const SCEV *, 4> Good;
+  SmallVector<const SCEV *, 4> Bad;
+  DoInitialMatch(S, L, Good, Bad, SE, DT);
+  if (!Good.empty()) {
+    BaseRegs.push_back(SE.getAddExpr(Good));
+    AM.HasBaseReg = true;
+  }
+  if (!Bad.empty()) {
+    BaseRegs.push_back(SE.getAddExpr(Bad));
+    AM.HasBaseReg = true;
+  }
+}
 
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+  return !!ScaledReg + BaseRegs.size();
 }
 
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
-void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
-  while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+         ScaledReg ? ScaledReg->getType() :
+         AM.BaseGV ? AM.BaseGV->getType() :
+         0;
+}
 
-    if (I == 0 || !isInstructionTriviallyDead(I))
-      continue;
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+  return S == ScaledReg ||
+         std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
 
-    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
-      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
-        *OI = 0;
-        if (U->use_empty())
-          DeadInsts.push_back(U);
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                         const RegUseTracker &RegUses) const {
+  if (ScaledReg)
+    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+      return true;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I)
+    if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+      return true;
+  return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+  bool First = true;
+  if (AM.BaseGV) {
+    if (!First) OS << " + "; else First = false;
+    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+  }
+  if (AM.BaseOffs != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.BaseOffs;
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I) {
+    if (!First) OS << " + "; else First = false;
+    OS << "reg(" << **I << ')';
+  }
+  if (AM.Scale != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.Scale << "*reg(";
+    if (ScaledReg)
+      OS << *ScaledReg;
+    else
+      OS << "<unknown>";
+    OS << ')';
+  }
+}
+
+void Formula::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
+/// or null otherwise. If IgnoreSignificantBits is true, expressions like
+/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
+/// overflow, which is useful when the result will be used in a context where
+/// the most significant bits are ignored.
+static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
+                           ScalarEvolution &SE,
+                           bool IgnoreSignificantBits = false) {
+  // Handle the trivial case, which works for any SCEV type.
+  if (LHS == RHS)
+    return SE.getIntegerSCEV(1, LHS->getType());
+
+  // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
+  // folding.
+  if (RHS->isAllOnesValue())
+    return SE.getMulExpr(LHS, RHS);
+
+  // Check for a division of a constant by a constant.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+    const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+    if (!RC)
+      return 0;
+    if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+      return 0;
+    return SE.getConstant(C->getValue()->getValue()
+               .sdiv(RC->getValue()->getValue()));
+  }
+
+  // Distribute the sdiv over addrec operands.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+    const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
+                                IgnoreSignificantBits);
+    if (!Start) return 0;
+    const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
+                               IgnoreSignificantBits);
+    if (!Step) return 0;
+    return SE.getAddRecExpr(Start, Step, AR->getLoop());
+  }
+
+  // Distribute the sdiv over add operands.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+    SmallVector<const SCEV *, 8> Ops;
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I) {
+      const SCEV *Op = getSDiv(*I, RHS, SE,
+                               IgnoreSignificantBits);
+      if (!Op) return 0;
+      Ops.push_back(Op);
+    }
+    return SE.getAddExpr(Ops);
+  }
+
+  // Check for a multiply operand that we can pull RHS out of.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+    if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
+      SmallVector<const SCEV *, 4> Ops;
+      bool Found = false;
+      for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+           I != E; ++I) {
+        if (!Found)
+          if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
+            Ops.push_back(Q);
+            Found = true;
+            continue;
+          }
+        Ops.push_back(*I);
       }
+      return Found ? SE.getMulExpr(Ops) : 0;
+    }
 
-    I->eraseFromParent();
-    Changed = true;
+  // Otherwise we don't know.
+  return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+      S = SE.getIntegerSCEV(0, C->getType());
+      return C->getValue()->getSExtValue();
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
+  }
+  return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+      S = SE.getIntegerSCEV(0, GV->getType());
+      return GV;
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+    S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
   }
+  return 0;
 }
 
 /// isAddressUse - Returns true if the specified instruction is using the
@@ -276,1775 +503,832 @@ static const Type *getAccessType(const Instruction *Inst) {
       break;
     }
   }
-  return AccessTy;
-}
-
-namespace {
-  /// BasedUser - For a particular base value, keep information about how we've
-  /// partitioned the expression so far.
-  struct BasedUser {
-    /// Base - The Base value for the PHI node that needs to be inserted for
-    /// this use.  As the use is processed, information gets moved from this
-    /// field to the Imm field (below).  BasedUser values are sorted by this
-    /// field.
-    const SCEV *Base;
-
-    /// Inst - The instruction using the induction variable.
-    Instruction *Inst;
-
-    /// OperandValToReplace - The operand value of Inst to replace with the
-    /// EmittedBase.
-    Value *OperandValToReplace;
-
-    /// Imm - The immediate value that should be added to the base immediately
-    /// before Inst, because it will be folded into the imm field of the
-    /// instruction.  This is also sometimes used for loop-variant values that
-    /// must be added inside the loop.
-    const SCEV *Imm;
-
-    /// Phi - The induction variable that performs the striding that
-    /// should be used for this user.
-    PHINode *Phi;
-
-    // isUseOfPostIncrementedValue - True if this should use the
-    // post-incremented version of this IV, not the preincremented version.
-    // This can only be set in special cases, such as the terminating setcc
-    // instruction for a loop and uses outside the loop that are dominated by
-    // the loop.
-    bool isUseOfPostIncrementedValue;
-
-    BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
-      : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
-        OperandValToReplace(IVSU.getOperandValToReplace()),
-        Imm(se->getIntegerSCEV(0, Base->getType())),
-        isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
-
-    // Once we rewrite the code to insert the new IVs we want, update the
-    // operands of Inst to use the new expression 'NewBase', with 'Imm' added
-    // to it.
-    void RewriteInstructionToUseNewBase(const SCEV *NewBase,
-                                        Instruction *InsertPt,
-                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                        SmallVectorImpl<WeakVH> &DeadInsts,
-                                        ScalarEvolution *SE);
-
-    Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
-                                       const Type *Ty,
-                                       SCEVExpander &Rewriter,
-                                       Instruction *IP,
-                                       ScalarEvolution *SE);
-    void dump() const;
-  };
-}
-
-void BasedUser::dump() const {
-  dbgs() << " Base=" << *Base;
-  dbgs() << " Imm=" << *Imm;
-  dbgs() << "   Inst: " << *Inst;
-}
-
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
-                                              const Type *Ty,
-                                              SCEVExpander &Rewriter,
-                                              Instruction *IP,
-                                              ScalarEvolution *SE) {
-  Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
 
-  // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
-  // re-analyze it.
-  const SCEV *NewValSCEV = SE->getUnknown(Base);
+  // All pointers have the same requirements, so canonicalize them to an
+  // arbitrary pointer type to minimize variation.
+  if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+    AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                PTy->getAddressSpace());
 
-  // Always emit the immediate into the same block as the user.
-  NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-
-  return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
+  return AccessTy;
 }
 
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+  bool Changed = false;
 
-// Once we rewrite the code to insert the new IVs we want, update the
-// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it. NewBasePt is the last instruction which contributes to the
-// value of NewBase in the case that it's a diffferent instruction from
-// the PHI that NewBase is computed from, or null otherwise.
-//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
-                                               Instruction *NewBasePt,
-                                      SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                      SmallVectorImpl<WeakVH> &DeadInsts,
-                                      ScalarEvolution *SE) {
-  if (!isa<PHINode>(Inst)) {
-    // By default, insert code at the user instruction.
-    BasicBlock::iterator InsertPt = Inst;
-
-    // However, if the Operand is itself an instruction, the (potentially
-    // complex) inserted code may be shared by many users.  Because of this, we
-    // want to emit code for the computation of the operand right before its old
-    // computation.  This is usually safe, because we obviously used to use the
-    // computation when it was computed in its current block.  However, in some
-    // cases (e.g. use of a post-incremented induction variable) the NewBase
-    // value will be pinned to live somewhere after the original computation.
-    // In this case, we have to back off.
-    //
-    // If this is a use outside the loop (which means after, since it is based
-    // on a loop indvar) we use the post-incremented value, so that we don't
-    // artificially make the preinc value live out the bottom of the loop.
-    if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
-      if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
-        InsertPt = NewBasePt;
-        ++InsertPt;
-      } else if (Instruction *OpInst
-                 = dyn_cast<Instruction>(OperandValToReplace)) {
-        InsertPt = OpInst;
-        while (isa<PHINode>(InsertPt)) ++InsertPt;
-      }
-    }
-    Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
-                                                OperandValToReplace->getType(),
-                                                Rewriter, InsertPt, SE);
-    // Replace the use of the operand Value with the new Phi we just created.
-    Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
-
-    DEBUG(dbgs() << "      Replacing with ");
-    DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
-    DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
-                 << *Imm << "\n");
-    return;
-  }
+  while (!DeadInsts.empty()) {
+    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
 
-  // PHI nodes are more complex.  We have to insert one copy of the NewBase+Imm
-  // expression into each operand block that uses it.  Note that PHI nodes can
-  // have multiple entries for the same predecessor.  We use a map to make sure
-  // that a PHI node only has a single Value* for each predecessor (which also
-  // prevents us from inserting duplicate code in some blocks).
-  DenseMap<BasicBlock*, Value*> InsertedCode;
-  PHINode *PN = cast<PHINode>(Inst);
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    if (PN->getIncomingValue(i) == OperandValToReplace) {
-      // If the original expression is outside the loop, put the replacement
-      // code in the same place as the original expression,
-      // which need not be an immediate predecessor of this PHI.  This way we
-      // need only one copy of it even if it is referenced multiple times in
-      // the PHI.  We don't do this when the original expression is inside the
-      // loop because multiple copies sometimes do useful sinking of code in
-      // that case(?).
-      Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
-      BasicBlock *PHIPred = PN->getIncomingBlock(i);
-      if (L->contains(OldLoc)) {
-        // If this is a critical edge, split the edge so that we do not insert
-        // the code on all predecessor/successor paths.  We do this unless this
-        // is the canonical backedge for this loop, as this can make some
-        // inserted code be in an illegal position.
-        if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
-            !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
-            (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
-
-          // First step, split the critical edge.
-          BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
-                                                P, false);
-
-          // Next step: move the basic block.  In particular, if the PHI node
-          // is outside of the loop, and PredTI is in the loop, we want to
-          // move the block to be immediately before the PHI block, not
-          // immediately after PredTI.
-          if (L->contains(PHIPred) && !L->contains(PN))
-            NewBB->moveBefore(PN->getParent());
+    if (I == 0 || !isInstructionTriviallyDead(I))
+      continue;
 
-          // Splitting the edge can reduce the number of PHI entries we have.
-          e = PN->getNumIncomingValues();
-          PHIPred = NewBB;
-          i = PN->getBasicBlockIndex(PHIPred);
-        }
-      }
-      Value *&Code = InsertedCode[PHIPred];
-      if (!Code) {
-        // Insert the code into the end of the predecessor block.
-        Instruction *InsertPt = (L->contains(OldLoc)) ?
-                                PHIPred->getTerminator() :
-                                OldLoc->getParent()->getTerminator();
-        Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
-                                           Rewriter, InsertPt, SE);
-
-        DEBUG(dbgs() << "      Changing PHI use to ");
-        DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
-        DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
-                     << *Imm << "\n");
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        *OI = 0;
+        if (U->use_empty())
+          DeadInsts.push_back(U);
       }
 
-      // Replace the use of the operand Value with the new Phi we just created.
-      PN->setIncomingValue(i, Code);
-      Rewriter.clear();
-    }
+    I->eraseFromParent();
+    Changed = true;
   }
 
-  // PHI node might have become a constant value after SplitCriticalEdge.
-  DeadInsts.push_back(Inst);
+  return Changed;
 }
 
+namespace {
 
-/// fitsInAddressMode - Return true if V can be subsumed within an addressing
-/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
-                             const TargetLowering *TLI, bool HasBaseReg) {
-  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
-    int64_t VC = SC->getValue()->getSExtValue();
-    if (TLI) {
-      TargetLowering::AddrMode AM;
-      AM.BaseOffs = VC;
-      AM.HasBaseReg = HasBaseReg;
-      return TLI->isLegalAddressingMode(AM, AccessTy);
-    } else {
-      // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
-      return (VC > -(1 << 16) && VC < (1 << 16)-1);
-    }
-  }
-
-  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
-      if (TLI) {
-        TargetLowering::AddrMode AM;
-        AM.BaseGV = GV;
-        AM.HasBaseReg = HasBaseReg;
-        return TLI->isLegalAddressingMode(AM, AccessTy);
-      } else {
-        // Default: assume global addresses are not legal.
-      }
-    }
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+  /// TODO: Some of these could be merged. Also, a lexical ordering
+  /// isn't always optimal.
+  unsigned NumRegs;
+  unsigned AddRecCost;
+  unsigned NumIVMuls;
+  unsigned NumBaseAdds;
+  unsigned ImmCost;
+  unsigned SetupCost;
+
+public:
+  Cost()
+    : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+      SetupCost(0) {}
+
+  unsigned getNumRegs() const { return NumRegs; }
+
+  bool operator<(const Cost &Other) const;
+
+  void Loose();
+
+  void RateFormula(const Formula &F,
+                   SmallPtrSet<const SCEV *, 16> &Regs,
+                   const DenseSet<const SCEV *> &VisitedRegs,
+                   const Loop *L,
+                   const SmallVectorImpl<int64_t> &Offsets,
+                   ScalarEvolution &SE, DominatorTree &DT);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  void RateRegister(const SCEV *Reg,
+                    SmallPtrSet<const SCEV *, 16> &Regs,
+                    const Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+  void RatePrimaryRegister(const SCEV *Reg,
+                           SmallPtrSet<const SCEV *, 16> &Regs,
+                           const Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT);
+};
 
-  return false;
 }
 
-/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
-/// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
-                                             Loop *L, ScalarEvolution *SE) {
-  if (Val->isLoopInvariant(L)) return;  // Nothing to do.
-
-  if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV *, 4> NewOps;
-    NewOps.reserve(SAE->getNumOperands());
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+                        SmallPtrSet<const SCEV *, 16> &Regs,
+                        const Loop *L,
+                        ScalarEvolution &SE, DominatorTree &DT) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+    if (AR->getLoop() == L)
+      AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    // If this is an addrec for a loop that's already been visited by LSR,
+    // don't second-guess its addrec phi nodes. LSR isn't currently smart
+    // enough to reason about more than one loop at a time. Consider these
+    // registers free and leave them alone.
+    else if (L->contains(AR->getLoop()) ||
+             (!AR->getLoop()->contains(L) &&
+              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+           PHINode *PN = dyn_cast<PHINode>(I); ++I)
+        if (SE.isSCEVable(PN->getType()) &&
+            (SE.getEffectiveSCEVType(PN->getType()) ==
+             SE.getEffectiveSCEVType(AR->getType())) &&
+            SE.getSCEV(PN) == AR)
+          return;
 
-    for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
-      if (!SAE->getOperand(i)->isLoopInvariant(L)) {
-        // If this is a loop-variant expression, it must stay in the immediate
-        // field of the expression.
-        Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
-      } else {
-        NewOps.push_back(SAE->getOperand(i));
-      }
+      // If this isn't one of the addrecs that the loop already has, it
+      // would require a costly new phi and add. TODO: This isn't
+      // precisely modeled right now.
+      ++NumBaseAdds;
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getStart(), Regs, L, SE, DT);
+    }
 
-    if (NewOps.empty())
-      Val = SE->getIntegerSCEV(0, Val->getType());
-    else
-      Val = SE->getAddExpr(NewOps);
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
-    // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV *Start = SARE->getStart();
-    MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
-    SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-    Ops[0] = Start;
-    Val = SE->getAddRecExpr(Ops, SARE->getLoop());
-  } else {
-    // Otherwise, all of Val is variant, move the whole thing over.
-    Imm = SE->getAddExpr(Imm, Val);
-    Val = SE->getIntegerSCEV(0, Val->getType());
+    // Add the step value register, if it needs one.
+    // TODO: The non-affine case isn't precisely modeled here.
+    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
   }
+  ++NumRegs;
+
+  // Rough heuristic; favor registers which don't require extra setup
+  // instructions in the preheader.
+  if (!isa<SCEVUnknown>(Reg) &&
+      !isa<SCEVConstant>(Reg) &&
+      !(isa<SCEVAddRecExpr>(Reg) &&
+        (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+         isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+    ++SetupCost;
 }
 
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+                         SmallPtrSet<const SCEV *, 16> &Regs,
+                         const Loop *L,
+                         ScalarEvolution &SE, DominatorTree &DT) {
+  if (Regs.insert(Reg))
+    RateRegister(Reg, Regs, L, SE, DT);
+}
 
-/// MoveImmediateValues - Look at Val, and pull out any additions of constants
-/// that can fit into the immediate field of instructions in the target.
-/// Accumulate these immediate values into the Imm value.
-static void MoveImmediateValues(const TargetLowering *TLI,
-                                const Type *AccessTy,
-                                const SCEV *&Val, const SCEV *&Imm,
-                                bool isAddress, Loop *L,
-                                ScalarEvolution *SE) {
-  if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV *, 4> NewOps;
-    NewOps.reserve(SAE->getNumOperands());
-
-    for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
-      const SCEV *NewOp = SAE->getOperand(i);
-      MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
-      if (!NewOp->isLoopInvariant(L)) {
-        // If this is a loop-variant expression, it must stay in the immediate
-        // field of the expression.
-        Imm = SE->getAddExpr(Imm, NewOp);
-      } else {
-        NewOps.push_back(NewOp);
-      }
-    }
-
-    if (NewOps.empty())
-      Val = SE->getIntegerSCEV(0, Val->getType());
-    else
-      Val = SE->getAddExpr(NewOps);
-    return;
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
-    // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV *Start = SARE->getStart();
-    MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
-    if (Start != SARE->getStart()) {
-      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-      Ops[0] = Start;
-      Val = SE->getAddRecExpr(Ops, SARE->getLoop());
+void Cost::RateFormula(const Formula &F,
+                       SmallPtrSet<const SCEV *, 16> &Regs,
+                       const DenseSet<const SCEV *> &VisitedRegs,
+                       const Loop *L,
+                       const SmallVectorImpl<int64_t> &Offsets,
+                       ScalarEvolution &SE, DominatorTree &DT) {
+  // Tally up the registers.
+  if (const SCEV *ScaledReg = F.ScaledReg) {
+    if (VisitedRegs.count(ScaledReg)) {
+      Loose();
+      return;
     }
-    return;
-  } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
-    // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
-    if (isAddress &&
-        fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
-        SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
-
-      const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
-      const SCEV *NewOp = SME->getOperand(1);
-      MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
-      // If we extracted something out of the subexpressions, see if we can
-      // simplify this!
-      if (NewOp != SME->getOperand(1)) {
-        // Scale SubImm up by "8".  If the result is a target constant, we are
-        // good.
-        SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
-        if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
-          // Accumulate the immediate.
-          Imm = SE->getAddExpr(Imm, SubImm);
-
-          // Update what is left of 'Val'.
-          Val = SE->getMulExpr(SME->getOperand(0), NewOp);
-          return;
-        }
-      }
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (VisitedRegs.count(BaseReg)) {
+      Loose();
+      return;
     }
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+
+    NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
+                 BaseReg->hasComputableLoopEvolution(L);
   }
 
-  // Loop-variant expressions must stay in the immediate field of the
-  // expression.
-  if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) ||
-      !Val->isLoopInvariant(L)) {
-    Imm = SE->getAddExpr(Imm, Val);
-    Val = SE->getIntegerSCEV(0, Val->getType());
-    return;
+  if (F.BaseRegs.size() > 1)
+    NumBaseAdds += F.BaseRegs.size() - 1;
+
+  // Tally up the non-zero immediates.
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+    if (F.AM.BaseGV)
+      ImmCost += 64; // Handle symbolic values conservatively.
+                     // TODO: This should probably be the pointer size.
+    else if (Offset != 0)
+      ImmCost += APInt(64, Offset, true).getMinSignedBits();
   }
+}
 
-  // Otherwise, no immediates to move.
+/// Loose - Set this cost to a loosing value.
+void Cost::Loose() {
+  NumRegs = ~0u;
+  AddRecCost = ~0u;
+  NumIVMuls = ~0u;
+  NumBaseAdds = ~0u;
+  ImmCost = ~0u;
+  SetupCost = ~0u;
 }
 
-static void MoveImmediateValues(const TargetLowering *TLI,
-                                Instruction *User,
-                                const SCEV *&Val, const SCEV *&Imm,
-                                bool isAddress, Loop *L,
-                                ScalarEvolution *SE) {
-  const Type *AccessTy = getAccessType(User);
-  MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE);
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+  if (NumRegs != Other.NumRegs)
+    return NumRegs < Other.NumRegs;
+  if (AddRecCost != Other.AddRecCost)
+    return AddRecCost < Other.AddRecCost;
+  if (NumIVMuls != Other.NumIVMuls)
+    return NumIVMuls < Other.NumIVMuls;
+  if (NumBaseAdds != Other.NumBaseAdds)
+    return NumBaseAdds < Other.NumBaseAdds;
+  if (ImmCost != Other.ImmCost)
+    return ImmCost < Other.ImmCost;
+  if (SetupCost != Other.SetupCost)
+    return SetupCost < Other.SetupCost;
+  return false;
 }
 
-/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
-/// added together.  This is used to reassociate common addition subexprs
-/// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
-                             const SCEV *Expr,
-                             ScalarEvolution *SE) {
-  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
-    for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
-      SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
-    const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
-    if (SARE->getOperand(0) == Zero) {
-      SubExprs.push_back(Expr);
-    } else {
-      // Compute the addrec with zero as its base.
-      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-      Ops[0] = Zero;   // Start with zero base.
-      SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
+void Cost::print(raw_ostream &OS) const {
+  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+  if (AddRecCost != 0)
+    OS << ", with addrec cost " << AddRecCost;
+  if (NumIVMuls != 0)
+    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+  if (NumBaseAdds != 0)
+    OS << ", plus " << NumBaseAdds << " base add"
+       << (NumBaseAdds == 1 ? "" : "s");
+  if (ImmCost != 0)
+    OS << ", plus " << ImmCost << " imm cost";
+  if (SetupCost != 0)
+    OS << ", plus " << SetupCost << " setup cost";
+}
 
+void Cost::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-      SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
-    }
-  } else if (!Expr->isZero()) {
-    // Do not add zero.
-    SubExprs.push_back(Expr);
-  }
-}
-
-// This is logically local to the following function, but C++ says we have
-// to make it file scope.
-struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-
-/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
-/// the Uses, removing any common subexpressions, except that if all such
-/// subexpressions can be folded into an addressing mode for all uses inside
-/// the loop (this case is referred to as "free" in comments herein) we do
-/// not remove anything.  This looks for things like (a+b+c) and
-/// (a+c+d) and computes the common (a+c) subexpression.  The common expression
-/// is *removed* from the Bases and returned.
-static const SCEV *
-RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
-                                    ScalarEvolution *SE, Loop *L,
-                                    const TargetLowering *TLI) {
-  unsigned NumUses = Uses.size();
-
-  // Only one use?  This is a very common case, so we handle it specially and
-  // cheaply.
-  const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
-  const SCEV *Result = Zero;
-  const SCEV *FreeResult = Zero;
-  if (NumUses == 1) {
-    // If the use is inside the loop, use its base, regardless of what it is:
-    // it is clearly shared across all the IV's.  If the use is outside the loop
-    // (which means after it) we don't want to factor anything *into* the loop,
-    // so just use 0 as the base.
-    if (L->contains(Uses[0].Inst))
-      std::swap(Result, Uses[0].Base);
-    return Result;
-  }
+namespace {
 
-  // To find common subexpressions, count how many of Uses use each expression.
-  // If any subexpressions are used Uses.size() times, they are common.
-  // Also track whether all uses of each expression can be moved into an
-  // an addressing mode "for free"; such expressions are left within the loop.
-  // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-  std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
-  // UniqueSubExprs - Keep track of all of the subexpressions we see in the
-  // order we see them.
-  SmallVector<const SCEV *, 16> UniqueSubExprs;
-
-  SmallVector<const SCEV *, 16> SubExprs;
-  unsigned NumUsesInsideLoop = 0;
-  for (unsigned i = 0; i != NumUses; ++i) {
-    // If the user is outside the loop, just ignore it for base computation.
-    // Since the user is outside the loop, it must be *after* the loop (if it
-    // were before, it could not be based on the loop IV).  We don't want users
-    // after the loop to affect base computation of values *inside* the loop,
-    // because we can always add their offsets to the result IV after the loop
-    // is done, ensuring we get good code inside the loop.
-    if (!L->contains(Uses[i].Inst))
-      continue;
-    NumUsesInsideLoop++;
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+  /// UserInst - The instruction which will be updated.
+  Instruction *UserInst;
 
-    // If the base is zero (which is common), return zero now, there are no
-    // CSEs we can find.
-    if (Uses[i].Base == Zero) return Zero;
+  /// OperandValToReplace - The operand of the instruction which will
+  /// be replaced. The operand may be used more than once; every instance
+  /// will be replaced.
+  Value *OperandValToReplace;
 
-    // If this use is as an address we may be able to put CSEs in the addressing
-    // mode rather than hoisting them.
-    bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
-    // We may need the AccessTy below, but only when isAddrUse, so compute it
-    // only in that case.
-    const Type *AccessTy = 0;
-    if (isAddrUse)
-      AccessTy = getAccessType(Uses[i].Inst);
-
-    // Split the expression into subexprs.
-    SeparateSubExprs(SubExprs, Uses[i].Base, SE);
-    // Add one to SubExpressionUseData.Count for each subexpr present, and
-    // if the subexpr is not a valid immediate within an addressing mode use,
-    // set SubExpressionUseData.notAllUsesAreFree.  We definitely want to
-    // hoist these out of the loop (if they are common to all uses).
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      if (++SubExpressionUseData[SubExprs[j]].Count == 1)
-        UniqueSubExprs.push_back(SubExprs[j]);
-      if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false))
-        SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
-    }
-    SubExprs.clear();
-  }
-
-  // Now that we know how many times each is used, build Result.  Iterate over
-  // UniqueSubexprs so that we have a stable ordering.
-  for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<const SCEV *, SubExprUseData>::iterator I =
-       SubExpressionUseData.find(UniqueSubExprs[i]);
-    assert(I != SubExpressionUseData.end() && "Entry not found?");
-    if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
-      if (I->second.notAllUsesAreFree)
-        Result = SE->getAddExpr(Result, I->first);
-      else
-        FreeResult = SE->getAddExpr(FreeResult, I->first);
-    } else
-      // Remove non-cse's from SubExpressionUseData.
-      SubExpressionUseData.erase(I);
-  }
-
-  if (FreeResult != Zero) {
-    // We have some subexpressions that can be subsumed into addressing
-    // modes in every use inside the loop.  However, it's possible that
-    // there are so many of them that the combined FreeResult cannot
-    // be subsumed, or that the target cannot handle both a FreeResult
-    // and a Result in the same instruction (for example because it would
-    // require too many registers).  Check this.
-    for (unsigned i=0; i<NumUses; ++i) {
-      if (!L->contains(Uses[i].Inst))
-        continue;
-      // We know this is an addressing mode use; if there are any uses that
-      // are not, FreeResult would be Zero.
-      const Type *AccessTy = getAccessType(Uses[i].Inst);
-      if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) {
-        // FIXME:  could split up FreeResult into pieces here, some hoisted
-        // and some not.  There is no obvious advantage to this.
-        Result = SE->getAddExpr(Result, FreeResult);
-        FreeResult = Zero;
-        break;
-      }
-    }
-  }
+  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// induction variable, this variable is non-null and holds the loop
+  /// associated with the induction variable.
+  const Loop *PostIncLoop;
 
-  // If we found no CSE's, return now.
-  if (Result == Zero) return Result;
+  /// LUIdx - The index of the LSRUse describing the expression which
+  /// this fixup needs, minus an offset (below).
+  size_t LUIdx;
 
-  // If we still have a FreeResult, remove its subexpressions from
-  // SubExpressionUseData.  This means they will remain in the use Bases.
-  if (FreeResult != Zero) {
-    SeparateSubExprs(SubExprs, FreeResult, SE);
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      std::map<const SCEV *, SubExprUseData>::iterator I =
-         SubExpressionUseData.find(SubExprs[j]);
-      SubExpressionUseData.erase(I);
-    }
-    SubExprs.clear();
-  }
+  /// Offset - A constant offset to be added to the LSRUse expression.
+  /// This allows multiple fixups to share the same LSRUse with different
+  /// offsets, for example in an unrolled loop.
+  int64_t Offset;
 
-  // Otherwise, remove all of the CSE's we found from each of the base values.
-  for (unsigned i = 0; i != NumUses; ++i) {
-    // Uses outside the loop don't necessarily include the common base, but
-    // the final IV value coming into those uses does.  Instead of trying to
-    // remove the pieces of the common base, which might not be there,
-    // subtract off the base to compensate for this.
-    if (!L->contains(Uses[i].Inst)) {
-      Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
-      continue;
-    }
+  LSRFixup();
 
-    // Split the expression into subexprs.
-    SeparateSubExprs(SubExprs, Uses[i].Base, SE);
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-    // Remove any common subexpressions.
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
-      if (SubExpressionUseData.count(SubExprs[j])) {
-        SubExprs.erase(SubExprs.begin()+j);
-        --j; --e;
-      }
+}
 
-    // Finally, add the non-shared expressions together.
-    if (SubExprs.empty())
-      Uses[i].Base = Zero;
-    else
-      Uses[i].Base = SE->getAddExpr(SubExprs);
-    SubExprs.clear();
+LSRFixup::LSRFixup()
+  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+    LUIdx(~size_t(0)), Offset(0) {}
+
+void LSRFixup::print(raw_ostream &OS) const {
+  OS << "UserInst=";
+  // Store is common and interesting enough to be worth special-casing.
+  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+    OS << "store ";
+    WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+  } else if (UserInst->getType()->isVoidTy())
+    OS << UserInst->getOpcodeName();
+  else
+    WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+  OS << ", OperandValToReplace=";
+  WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+  if (PostIncLoop) {
+    OS << ", PostIncLoop=";
+    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
   }
 
-  return Result;
-}
+  if (LUIdx != ~size_t(0))
+    OS << ", LUIdx=" << LUIdx;
 
-/// ValidScale - Check whether the given Scale is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
-                               const std::vector<BasedUser>& UsersToProcess) {
-  if (!TLI)
-    return true;
+  if (Offset != 0)
+    OS << ", Offset=" << Offset;
+}
 
-  for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
-    // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy =
-        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
-    if (isAddressUse(UsersToProcess[i].Inst,
-                     UsersToProcess[i].OperandValToReplace))
-      AccessTy = getAccessType(UsersToProcess[i].Inst);
-    else if (isa<PHINode>(UsersToProcess[i].Inst))
-      continue;
+void LSRFixup::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-    TargetLowering::AddrMode AM;
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
-      AM.BaseOffs = SC->getValue()->getSExtValue();
-    AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
-    AM.Scale = Scale;
+namespace {
 
-    // If load[imm+r*scale] is illegal, bail out.
-    if (!TLI->isLegalAddressingMode(AM, AccessTy))
-      return false;
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+  static SmallVector<const SCEV *, 2> getEmptyKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-1));
+    return V;
   }
-  return true;
-}
-
-/// ValidOffset - Check whether the given Offset is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
-                               int64_t Offset,
-                               int64_t Scale,
-                               const std::vector<BasedUser>& UsersToProcess) {
-  if (!TLI)
-    return true;
 
-  for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
-    // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy =
-        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
-    if (isAddressUse(UsersToProcess[i].Inst,
-                     UsersToProcess[i].OperandValToReplace))
-      AccessTy = getAccessType(UsersToProcess[i].Inst);
-    else if (isa<PHINode>(UsersToProcess[i].Inst))
-      continue;
+  static SmallVector<const SCEV *, 2> getTombstoneKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-2));
+    return V;
+  }
 
-    TargetLowering::AddrMode AM;
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
-      AM.BaseOffs = SC->getValue()->getSExtValue();
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
-    AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
-    AM.Scale = Scale;
+  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+    unsigned Result = 0;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+         E = V.end(); I != E; ++I)
+      Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+    return Result;
+  }
 
-    // If load[imm+r*scale] is illegal, bail out.
-    if (!TLI->isLegalAddressingMode(AM, AccessTy))
-      return false;
+  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+                      const SmallVector<const SCEV *, 2> &RHS) {
+    return LHS == RHS;
   }
-  return true;
-}
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+  /// KindType - An enum for a kind of use, indicating what types of
+  /// scaled and immediate operands it might support.
+  enum KindType {
+    Basic,   ///< A normal use, with no folding.
+    Special, ///< A special case of basic, allowing -1 scales.
+    Address, ///< An address use; folding according to TargetLowering
+    ICmpZero ///< An equality icmp with both operands folded into one.
+    // TODO: Add a generic icmp too?
+  };
 
-/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
-/// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
-                                                const Type *Ty2) {
-  if (Ty1 == Ty2)
-    return false;
-  Ty1 = SE->getEffectiveSCEVType(Ty1);
-  Ty2 = SE->getEffectiveSCEVType(Ty2);
-  if (Ty1 == Ty2)
-    return false;
-  if (Ty1->canLosslesslyBitCastTo(Ty2))
-    return false;
-  if (TLI && TLI->isTruncateFree(Ty1, Ty2))
-    return false;
-  return true;
-}
+  KindType Kind;
+  const Type *AccessTy;
 
-/// CheckForIVReuse - Returns the multiple if the stride is the multiple
-/// of a previous stride and it is a legal value for the target addressing
-/// mode scale component and optional base reg. This allows the users of
-/// this stride to be rewritten as prev iv * factor. It returns 0 if no
-/// reuse is possible.  Factors can be negative on same targets, e.g. ARM.
-///
-/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
-/// within.  Well, usually.
-const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
-                                bool AllUsesAreAddresses,
-                                bool AllUsesAreOutsideLoop,
-                                const SCEV *Stride,
-                                IVExpr &IV, const Type *Ty,
-                                const std::vector<BasedUser>& UsersToProcess) {
-  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
-    int64_t SInt = SC->getValue()->getSExtValue();
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
-        continue;
-      // The other stride has no uses, don't reuse it.
-      std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
-        IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
-      if (UI->second->Users.empty())
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SI->first != Stride &&
-          (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
-        continue;
-      int64_t Scale = SInt / SSInt;
-      // Check that this stride is valid for all the types used for loads and
-      // stores; if it can be used for some and not others, we might as well use
-      // the original stride everywhere, since we have to create the IV for it
-      // anyway. If the scale is 1, then we don't need to worry about folding
-      // multiplications.
-      if (Scale == 1 ||
-          (AllUsesAreAddresses &&
-           ValidScale(HasBaseReg, Scale, UsersToProcess))) {
-        // Prefer to reuse an IV with a base of zero.
-        for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-               IE = SI->second.IVs.end(); II != IE; ++II)
-          // Only reuse previous IV if it would not require a type conversion
-          // and if the base difference can be folded.
-          if (II->Base->isZero() &&
-              !RequiresTypeConversion(II->Base->getType(), Ty)) {
-            IV = *II;
-            return SE->getIntegerSCEV(Scale, Stride->getType());
-          }
-        // Otherwise, settle for an IV with a foldable base.
-        if (AllUsesAreAddresses)
-          for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-                 IE = SI->second.IVs.end(); II != IE; ++II)
-            // Only reuse previous IV if it would not require a type conversion
-            // and if the base difference can be folded.
-            if (SE->getEffectiveSCEVType(II->Base->getType()) ==
-                SE->getEffectiveSCEVType(Ty) &&
-                isa<SCEVConstant>(II->Base)) {
-              int64_t Base =
-                cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
-              if (Base > INT32_MIN && Base <= INT32_MAX &&
-                  ValidOffset(HasBaseReg, -Base * Scale,
-                              Scale, UsersToProcess)) {
-                IV = *II;
-                return SE->getIntegerSCEV(Scale, Stride->getType());
-              }
-            }
-      }
-    }
-  } else if (AllUsesAreOutsideLoop) {
-    // Accept nonconstant strides here; it is really really right to substitute
-    // an existing IV if we can.
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SI->first != Stride && SSInt != 1)
-        continue;
-      for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-             IE = SI->second.IVs.end(); II != IE; ++II)
-        // Accept nonzero base here.
-        // Only reuse previous IV if it would not require a type conversion.
-        if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
-          IV = *II;
-          return Stride;
-        }
-    }
-    // Special case, old IV is -1*x and this one is x.  Can treat this one as
-    // -1*old.
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end())
-        continue;
-      if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
-        if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
-          if (Stride == ME->getOperand(1) &&
-              SC->getValue()->getSExtValue() == -1LL)
-            for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-                   IE = SI->second.IVs.end(); II != IE; ++II)
-              // Accept nonzero base here.
-              // Only reuse previous IV if it would not require type conversion.
-              if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
-                IV = *II;
-                return SE->getIntegerSCEV(-1LL, Stride->getType());
-              }
-    }
-  }
-  return SE->getIntegerSCEV(0, Stride->getType());
-}
-
-/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
-/// returns true if Val's isUseOfPostIncrementedValue is true.
-static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
-  return Val.isUseOfPostIncrementedValue;
-}
-
-/// isNonConstantNegative - Return true if the specified scev is negated, but
-/// not a constant.
-static bool isNonConstantNegative(const SCEV *Expr) {
-  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
-  if (!Mul) return false;
-
-  // If there is a constant factor, it will be first.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
-  if (!SC) return false;
-
-  // Return true if the value is negative, this matches things like (-42 * V).
-  return SC->getValue()->getValue().isNegative();
-}
-
-/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
-/// base of the strided accesses, as well as the old information from Uses. We
-/// progressively move information from the Base field to the Imm field, until
-/// we eventually have the full access expression to rewrite the use.
-const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
-                                              IVUsersOfOneStride &Uses,
-                                              Loop *L,
-                                              bool &AllUsesAreAddresses,
-                                              bool &AllUsesAreOutsideLoop,
-                                       std::vector<BasedUser> &UsersToProcess) {
-  // FIXME: Generalize to non-affine IV's.
-  if (!Stride->isLoopInvariant(L))
-    return SE->getIntegerSCEV(0, Stride->getType());
-
-  UsersToProcess.reserve(Uses.Users.size());
-  for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
-       E = Uses.Users.end(); I != E; ++I) {
-    UsersToProcess.push_back(BasedUser(*I, SE));
-
-    // Move any loop variant operands from the offset field to the immediate
-    // field of the use, so that we don't try to use something before it is
-    // computed.
-    MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
-                                     UsersToProcess.back().Imm, L, SE);
-    assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
-           "Base value is not loop invariant!");
-  }
-
-  // We now have a whole bunch of uses of like-strided induction variables, but
-  // they might all have different bases.  We want to emit one PHI node for this
-  // stride which we fold as many common expressions (between the IVs) into as
-  // possible.  Start by identifying the common expressions in the base values
-  // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
-  // "A+B"), emit it to the preheader, then remove the expression from the
-  // UsersToProcess base values.
-  const SCEV *CommonExprs =
-    RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
-
-  // Next, figure out what we can represent in the immediate fields of
-  // instructions.  If we can represent anything there, move it to the imm
-  // fields of the BasedUsers.  We do this so that it increases the commonality
-  // of the remaining uses.
-  unsigned NumPHI = 0;
-  bool HasAddress = false;
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    // If the user is not in the current loop, this means it is using the exit
-    // value of the IV.  Do not put anything in the base, make sure it's all in
-    // the immediate field to allow as much factoring as possible.
-    if (!L->contains(UsersToProcess[i].Inst)) {
-      UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
-                                             UsersToProcess[i].Base);
-      UsersToProcess[i].Base =
-        SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
-    } else {
-      // Not all uses are outside the loop.
-      AllUsesAreOutsideLoop = false;
-
-      // Addressing modes can be folded into loads and stores.  Be careful that
-      // the store is through the expression, not of the expression though.
-      bool isPHI = false;
-      bool isAddress = isAddressUse(UsersToProcess[i].Inst,
-                                    UsersToProcess[i].OperandValToReplace);
-      if (isa<PHINode>(UsersToProcess[i].Inst)) {
-        isPHI = true;
-        ++NumPHI;
-      }
+  SmallVector<int64_t, 8> Offsets;
+  int64_t MinOffset;
+  int64_t MaxOffset;
 
-      if (isAddress)
-        HasAddress = true;
+  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+  /// LSRUse are outside of the loop, in which case some special-case heuristics
+  /// may be used.
+  bool AllFixupsOutsideLoop;
 
-      // If this use isn't an address, then not all uses are addresses.
-      if (!isAddress && !isPHI)
-        AllUsesAreAddresses = false;
+  /// Formulae - A list of ways to build a value that can satisfy this user.
+  /// After the list is populated, one of these is selected heuristically and
+  /// used to formulate a replacement for OperandValToReplace in UserInst.
+  SmallVector<Formula, 12> Formulae;
 
-      MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
-                          UsersToProcess[i].Imm, isAddress, L, SE);
-    }
-  }
+  /// Regs - The set of register candidates used by all formulae in this LSRUse.
+  SmallPtrSet<const SCEV *, 4> Regs;
 
-  // If one of the use is a PHI node and all other uses are addresses, still
-  // allow iv reuse. Essentially we are trading one constant multiplication
-  // for one fewer iv.
-  if (NumPHI > 1)
-    AllUsesAreAddresses = false;
+  LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+                                      MinOffset(INT64_MAX),
+                                      MaxOffset(INT64_MIN),
+                                      AllFixupsOutsideLoop(true) {}
 
-  // There are no in-loop address uses.
-  if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
-    AllUsesAreAddresses = false;
+  bool InsertFormula(size_t LUIdx, const Formula &F);
 
-  return CommonExprs;
-}
+  void check() const;
 
-/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
-/// is valid and profitable for the given set of users of a stride. In
-/// full strength-reduction mode, all addresses at the current stride are
-/// strength-reduced all the way down to pointer arithmetic.
-///
-bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
-                                   const std::vector<BasedUser> &UsersToProcess,
-                                   const Loop *L,
-                                   bool AllUsesAreAddresses,
-                                   const SCEV *Stride) {
-  if (!EnableFullLSRMode)
-    return false;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-  // The heuristics below aim to avoid increasing register pressure, but
-  // fully strength-reducing all the addresses increases the number of
-  // add instructions, so don't do this when optimizing for size.
-  // TODO: If the loop is large, the savings due to simpler addresses
-  // may oughtweight the costs of the extra increment instructions.
-  if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
-    return false;
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(size_t LUIdx, const Formula &F) {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
 
-  // TODO: For now, don't do full strength reduction if there could
-  // potentially be greater-stride multiples of the current stride
-  // which could reuse the current stride IV.
-  if (IU->StrideOrder.back() != Stride)
+  if (!Uniquifier.insert(Key).second)
     return false;
 
-  // Iterate through the uses to find conditions that automatically rule out
-  // full-lsr mode.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
-    const SCEV *Base = UsersToProcess[i].Base;
-    const SCEV *Imm = UsersToProcess[i].Imm;
-    // If any users have a loop-variant component, they can't be fully
-    // strength-reduced.
-    if (Imm && !Imm->isLoopInvariant(L))
-      return false;
-    // If there are to users with the same base and the difference between
-    // the two Imm values can't be folded into the address, full
-    // strength reduction would increase register pressure.
-    do {
-      const SCEV *CurImm = UsersToProcess[i].Imm;
-      if ((CurImm || Imm) && CurImm != Imm) {
-        if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
-        if (!Imm)       Imm = SE->getIntegerSCEV(0, Stride->getType());
-        const Instruction *Inst = UsersToProcess[i].Inst;
-        const Type *AccessTy = getAccessType(Inst);
-        const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
-        if (!Diff->isZero() &&
-            (!AllUsesAreAddresses ||
-             !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
-          return false;
-      }
-    } while (++i != e && Base == UsersToProcess[i].Base);
-  }
+  // Using a register to hold the value of 0 is not profitable.
+  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+         "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+  for (SmallVectorImpl<const SCEV *>::const_iterator I =
+       F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+    assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
 
-  // If there's exactly one user in this stride, fully strength-reducing it
-  // won't increase register pressure. If it's starting from a non-zero base,
-  // it'll be simpler this way.
-  if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
-    return true;
+  // Add the formula to the list.
+  Formulae.push_back(F);
 
-  // Otherwise, if there are any users in this stride that don't require
-  // a register for their base, full strength-reduction will increase
-  // register pressure.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    if (UsersToProcess[i].Base->isZero())
-      return false;
+  // Record registers now being used by this use.
+  if (F.ScaledReg) Regs.insert(F.ScaledReg);
+  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
 
-  // Otherwise, go for it.
   return true;
 }
 
-/// InsertAffinePhi Create and insert a PHI node for an induction variable
-/// with the specified start and step values in the specified loop.
-///
-/// If NegateStride is true, the stride should be negated by using a
-/// subtract instead of an add.
-///
-/// Return the created phi node.
-///
-static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
-                                Instruction *IVIncInsertPt,
-                                const Loop *L,
-                                SCEVExpander &Rewriter) {
-  assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
-  assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
-
-  BasicBlock *Header = L->getHeader();
-  BasicBlock *Preheader = L->getLoopPreheader();
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  const Type *Ty = Start->getType();
-  Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
-
-  PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
-  PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
-                  Preheader);
-
-  // If the stride is negative, insert a sub instead of an add for the
-  // increment.
-  bool isNegative = isNonConstantNegative(Step);
-  const SCEV *IncAmount = Step;
-  if (isNegative)
-    IncAmount = Rewriter.SE.getNegativeSCEV(Step);
-
-  // Insert an add instruction right before the terminator corresponding
-  // to the back-edge or just before the only use. The location is determined
-  // by the caller and passed in as IVIncInsertPt.
-  Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
-                                        Preheader->getTerminator());
-  Instruction *IncV;
-  if (isNegative) {
-    IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
-                                     IVIncInsertPt);
-  } else {
-    IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
-                                     IVIncInsertPt);
-  }
-  if (!isa<ConstantInt>(StepV)) ++NumVariable;
-
-  PN->addIncoming(IncV, LatchBlock);
-
-  ++NumInserted;
-  return PN;
-}
-
-static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
-  // We want to emit code for users inside the loop first.  To do this, we
-  // rearrange BasedUser so that the entries at the end have
-  // isUseOfPostIncrementedValue = false, because we pop off the end of the
-  // vector (so we handle them first).
-  std::partition(UsersToProcess.begin(), UsersToProcess.end(),
-                 PartitionByIsUseOfPostIncrementedValue);
-
-  // Sort this by base, so that things with the same base are handled
-  // together.  By partitioning first and stable-sorting later, we are
-  // guaranteed that within each base we will pop off users from within the
-  // loop before users outside of the loop with a particular base.
-  //
-  // We would like to use stable_sort here, but we can't.  The problem is that
-  // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
-  // we don't have anything to do a '<' comparison on.  Because we think the
-  // number of uses is small, do a horrible bubble sort which just relies on
-  // ==.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    // Get a base value.
-    const SCEV *Base = UsersToProcess[i].Base;
-
-    // Compact everything with this base to be consecutive with this one.
-    for (unsigned j = i+1; j != e; ++j) {
-      if (UsersToProcess[j].Base == Base) {
-        std::swap(UsersToProcess[i+1], UsersToProcess[j]);
-        ++i;
-      }
-    }
+void LSRUse::print(raw_ostream &OS) const {
+  OS << "LSR Use: Kind=";
+  switch (Kind) {
+  case Basic:    OS << "Basic"; break;
+  case Special:  OS << "Special"; break;
+  case ICmpZero: OS << "ICmpZero"; break;
+  case Address:
+    OS << "Address of ";
+    if (isa<PointerType>(AccessTy))
+      OS << "pointer"; // the full pointer type could be really verbose
+    else
+      OS << *AccessTy;
   }
-}
 
-/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
-/// UsersToProcess, meaning lowering addresses all the way down to direct
-/// pointer arithmetic.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFully(
-                                        std::vector<BasedUser> &UsersToProcess,
-                                        const SCEV *Stride,
-                                        const SCEV *CommonExprs,
-                                        const Loop *L,
-                                        SCEVExpander &PreheaderRewriter) {
-  DEBUG(dbgs() << "  Fully reducing all users\n");
-
-  // Rewrite the UsersToProcess records, creating a separate PHI for each
-  // unique Base value.
-  Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
-    // TODO: The uses are grouped by base, but not sorted. We arbitrarily
-    // pick the first Imm value here to start with, and adjust it for the
-    // other uses.
-    const SCEV *Imm = UsersToProcess[i].Imm;
-    const SCEV *Base = UsersToProcess[i].Base;
-    const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
-    PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
-                                   PreheaderRewriter);
-    // Loop over all the users with the same base.
-    do {
-      UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
-      UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
-      UsersToProcess[i].Phi = Phi;
-      assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
-             "ShouldUseFullStrengthReductionMode should reject this!");
-    } while (++i != e && Base == UsersToProcess[i].Base);
-  }
-}
-
-/// FindIVIncInsertPt - Return the location to insert the increment instruction.
-/// If the only use if a use of postinc value, (must be the loop termination
-/// condition), then insert it just before the use.
-static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
-                                      const Loop *L) {
-  if (UsersToProcess.size() == 1 &&
-      UsersToProcess[0].isUseOfPostIncrementedValue &&
-      L->contains(UsersToProcess[0].Inst))
-    return UsersToProcess[0].Inst;
-  return L->getLoopLatch()->getTerminator();
-}
-
-/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
-/// given users to share.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         const SCEV *Stride,
-                                         const SCEV *CommonExprs,
-                                         Value *CommonBaseV,
-                                         Instruction *IVIncInsertPt,
-                                         const Loop *L,
-                                         SCEVExpander &PreheaderRewriter) {
-  DEBUG(dbgs() << "  Inserting new PHI:\n");
-
-  PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
-                                 Stride, IVIncInsertPt, L,
-                                 PreheaderRewriter);
-
-  // Remember this in case a later stride is multiple of this.
-  IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
-
-  // All the users will share this new IV.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    UsersToProcess[i].Phi = Phi;
-
-  DEBUG(dbgs() << "    IV=");
-  DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false));
-  DEBUG(dbgs() << "\n");
-}
-
-/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
-/// reuse an induction variable with a stride that is a factor of the current
-/// induction variable.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         Value *CommonBaseV,
-                                         const IVExpr &ReuseIV,
-                                         Instruction *PreInsertPt) {
-  DEBUG(dbgs() << "  Rewriting in terms of existing IV of STRIDE "
-               << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
-
-  // All the users will share the reused IV.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    UsersToProcess[i].Phi = ReuseIV.PHI;
-
-  Constant *C = dyn_cast<Constant>(CommonBaseV);
-  if (C &&
-      (!C->isNullValue() &&
-       !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
-                         TLI, false)))
-    // We want the common base emitted into the preheader! This is just
-    // using cast as a copy so BitCast (no-op cast) is appropriate
-    CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
-                                  "commonbase", PreInsertPt);
-}
-
-static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
-                                    const Type *AccessTy,
-                                   std::vector<BasedUser> &UsersToProcess,
-                                   const TargetLowering *TLI) {
-  SmallVector<Instruction*, 16> AddrModeInsts;
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    if (UsersToProcess[i].isUseOfPostIncrementedValue)
-      continue;
-    ExtAddrMode AddrMode =
-      AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
-                                   AccessTy, UsersToProcess[i].Inst,
-                                   AddrModeInsts, *TLI);
-    if (GV && GV != AddrMode.BaseGV)
-      return false;
-    if (Offset && !AddrMode.BaseOffs)
-      // FIXME: How to accurate check it's immediate offset is folded.
-      return false;
-    AddrModeInsts.clear();
+  OS << ", Offsets={";
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    OS << *I;
+    if (next(I) != E)
+      OS << ',';
   }
-  return true;
-}
+  OS << '}';
 
-/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
-/// stride of IV.  All of the users may have different starting values, and this
-/// may not be the only stride.
-void
-LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
-                                                  IVUsersOfOneStride &Uses,
-                                                  Loop *L) {
-  // If all the users are moved to another stride, then there is nothing to do.
-  if (Uses.Users.empty())
-    return;
+  if (AllFixupsOutsideLoop)
+    OS << ", all-fixups-outside-loop";
+}
 
-  // Keep track if every use in UsersToProcess is an address. If they all are,
-  // we may be able to rewrite the entire collection of them in terms of a
-  // smaller-stride IV.
-  bool AllUsesAreAddresses = true;
-
-  // Keep track if every use of a single stride is outside the loop.  If so,
-  // we want to be more aggressive about reusing a smaller-stride IV; a
-  // multiply outside the loop is better than another IV inside.  Well, usually.
-  bool AllUsesAreOutsideLoop = true;
-
-  // Transform our list of users and offsets to a bit more complex table.  In
-  // this new vector, each 'BasedUser' contains 'Base' the base of the
-  // strided accessas well as the old information from Uses.  We progressively
-  // move information from the Base field to the Imm field, until we eventually
-  // have the full access expression to rewrite the use.
-  std::vector<BasedUser> UsersToProcess;
-  const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
-                                           AllUsesAreOutsideLoop,
-                                           UsersToProcess);
-
-  // Sort the UsersToProcess array so that users with common bases are
-  // next to each other.
-  SortUsersToProcess(UsersToProcess);
-
-  // If we managed to find some expressions in common, we'll need to carry
-  // their value in a register and add it in for each use. This will take up
-  // a register operand, which potentially restricts what stride values are
-  // valid.
-  bool HaveCommonExprs = !CommonExprs->isZero();
-  const Type *ReplacedTy = CommonExprs->getType();
-
-  // If all uses are addresses, consider sinking the immediate part of the
-  // common expression back into uses if they can fit in the immediate fields.
-  if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
-    const SCEV *NewCommon = CommonExprs;
-    const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
-    MoveImmediateValues(TLI, Type::getVoidTy(
-                        L->getLoopPreheader()->getContext()),
-                        NewCommon, Imm, true, L, SE);
-    if (!Imm->isZero()) {
-      bool DoSink = true;
-
-      // If the immediate part of the common expression is a GV, check if it's
-      // possible to fold it into the target addressing mode.
-      GlobalValue *GV = 0;
-      if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
-        GV = dyn_cast<GlobalValue>(SU->getValue());
-      int64_t Offset = 0;
-      if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
-        Offset = SC->getValue()->getSExtValue();
-      if (GV || Offset)
-        // Pass VoidTy as the AccessTy to be conservative, because
-        // there could be multiple access types among all the uses.
-        DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
-                          Type::getVoidTy(L->getLoopPreheader()->getContext()),
-                                         UsersToProcess, TLI);
-
-      if (DoSink) {
-        DEBUG(dbgs() << "  Sinking " << *Imm << " back down into uses\n");
-        for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-          UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
-        CommonExprs = NewCommon;
-        HaveCommonExprs = !CommonExprs->isZero();
-        ++NumImmSunk;
-      }
-    }
-  }
+void LSRUse::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-  // Now that we know what we need to do, insert the PHI node itself.
-  //
-  DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
-               << *Stride << ":\n"
-               << "  Common base: " << *CommonExprs << "\n");
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  switch (Kind) {
+  case LSRUse::Address:
+    // If we have low-level target information, ask the target if it can
+    // completely fold this address.
+    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+    // Otherwise, just guess that reg+reg addressing is legal.
+    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+  case LSRUse::ICmpZero:
+    // There's not even a target hook for querying whether it would be legal to
+    // fold a GV into an ICmp.
+    if (AM.BaseGV)
+      return false;
 
-  SCEVExpander Rewriter(*SE);
-  SCEVExpander PreheaderRewriter(*SE);
+    // ICmp only has two operands; don't allow more than two non-trivial parts.
+    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+      return false;
 
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  Instruction *PreInsertPt = Preheader->getTerminator();
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  Instruction *IVIncInsertPt = LatchBlock->getTerminator();
-
-  Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
-
-  const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
-  IVExpr   ReuseIV(SE->getIntegerSCEV(0,
-                                    Type::getInt32Ty(Preheader->getContext())),
-                   SE->getIntegerSCEV(0,
-                                    Type::getInt32Ty(Preheader->getContext())),
-                   0);
-
-  // Choose a strength-reduction strategy and prepare for it by creating
-  // the necessary PHIs and adjusting the bookkeeping.
-  if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
-                                         AllUsesAreAddresses, Stride)) {
-    PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
-                                 PreheaderRewriter);
-  } else {
-    // Emit the initial base value into the loop preheader.
-    CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
-                                                  PreInsertPt);
-
-    // If all uses are addresses, check if it is possible to reuse an IV.  The
-    // new IV must have a stride that is a multiple of the old stride; the
-    // multiple must be a number that can be encoded in the scale field of the
-    // target addressing mode; and we must have a valid instruction after this
-    // substitution, including the immediate field, if any.
-    RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
-                                    AllUsesAreOutsideLoop,
-                                    Stride, ReuseIV, ReplacedTy,
-                                    UsersToProcess);
-    if (!RewriteFactor->isZero())
-      PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
-                                               ReuseIV, PreInsertPt);
-    else {
-      IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
-      PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
-                                        CommonBaseV, IVIncInsertPt,
-                                        L, PreheaderRewriter);
-    }
-  }
+    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+    // putting the scaled register in the other operand of the icmp.
+    if (AM.Scale != 0 && AM.Scale != -1)
+      return false;
 
-  // Process all the users now, replacing their strided uses with
-  // strength-reduced forms.  This outer loop handles all bases, the inner
-  // loop handles all users of a particular base.
-  while (!UsersToProcess.empty()) {
-    const SCEV *Base = UsersToProcess.back().Base;
-    Instruction *Inst = UsersToProcess.back().Inst;
-
-    // Emit the code for Base into the preheader.
-    Value *BaseV = 0;
-    if (!Base->isZero()) {
-      BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
-
-      DEBUG(dbgs() << "  INSERTING code for BASE = " << *Base << ":");
-      if (BaseV->hasName())
-        DEBUG(dbgs() << " Result value name = %" << BaseV->getName());
-      DEBUG(dbgs() << "\n");
-
-      // If BaseV is a non-zero constant, make sure that it gets inserted into
-      // the preheader, instead of being forward substituted into the uses.  We
-      // do this by forcing a BitCast (noop cast) to be inserted into the
-      // preheader in this case.
-      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
-          isa<Constant>(BaseV)) {
-        // We want this constant emitted into the preheader! This is just
-        // using cast as a copy so BitCast (no-op cast) is appropriate
-        BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
-                                PreInsertPt);
-      }
+    // If we have low-level target information, ask the target if it can fold an
+    // integer immediate on an icmp.
+    if (AM.BaseOffs != 0) {
+      if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+      return false;
     }
 
-    // Emit the code to add the immediate offset to the Phi value, just before
-    // the instructions that we identified as using this stride and base.
-    do {
-      // FIXME: Use emitted users to emit other users.
-      BasedUser &User = UsersToProcess.back();
-
-      DEBUG(dbgs() << "    Examining ");
-      if (User.isUseOfPostIncrementedValue)
-        DEBUG(dbgs() << "postinc");
-      else
-        DEBUG(dbgs() << "preinc");
-      DEBUG(dbgs() << " use ");
-      DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace,
-                           /*PrintType=*/false));
-      DEBUG(dbgs() << " in Inst: " << *User.Inst);
-
-      // If this instruction wants to use the post-incremented value, move it
-      // after the post-inc and use its value instead of the PHI.
-      Value *RewriteOp = User.Phi;
-      if (User.isUseOfPostIncrementedValue) {
-        RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
-        // If this user is in the loop, make sure it is the last thing in the
-        // loop to ensure it is dominated by the increment. In case it's the
-        // only use of the iv, the increment instruction is already before the
-        // use.
-        if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
-          User.Inst->moveBefore(IVIncInsertPt);
-      }
-
-      const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
-
-      if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
-          SE->getEffectiveSCEVType(ReplacedTy)) {
-        assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
-               SE->getTypeSizeInBits(ReplacedTy) &&
-               "Unexpected widening cast!");
-        RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
-      }
+    return true;
 
-      // If we had to insert new instructions for RewriteOp, we have to
-      // consider that they may not have been able to end up immediately
-      // next to RewriteOp, because non-PHI instructions may never precede
-      // PHI instructions in a block. In this case, remember where the last
-      // instruction was inserted so that if we're replacing a different
-      // PHI node, we can use the later point to expand the final
-      // RewriteExpr.
-      Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
-      if (RewriteOp == User.Phi) NewBasePt = 0;
-
-      // Clear the SCEVExpander's expression map so that we are guaranteed
-      // to have the code emitted where we expect it.
-      Rewriter.clear();
-
-      // If we are reusing the iv, then it must be multiplied by a constant
-      // factor to take advantage of the addressing mode scale component.
-      if (!RewriteFactor->isZero()) {
-        // If we're reusing an IV with a nonzero base (currently this happens
-        // only when all reuses are outside the loop) subtract that base here.
-        // The base has been used to initialize the PHI node but we don't want
-        // it here.
-        if (!ReuseIV.Base->isZero()) {
-          const SCEV *typedBase = ReuseIV.Base;
-          if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
-              SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
-            // It's possible the original IV is a larger type than the new IV,
-            // in which case we have to truncate the Base.  We checked in
-            // RequiresTypeConversion that this is valid.
-            assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
-                   SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
-                   "Unexpected lengthening conversion!");
-            typedBase = SE->getTruncateExpr(ReuseIV.Base,
-                                            RewriteExpr->getType());
-          }
-          RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
-        }
+  case LSRUse::Basic:
+    // Only handle single-register values.
+    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
 
-        // Multiply old variable, with base removed, by new scale factor.
-        RewriteExpr = SE->getMulExpr(RewriteFactor,
-                                     RewriteExpr);
-
-        // The common base is emitted in the loop preheader. But since we
-        // are reusing an IV, it has not been used to initialize the PHI node.
-        // Add it to the expression used to rewrite the uses.
-        // When this use is outside the loop, we earlier subtracted the
-        // common base, and are adding it back here.  Use the same expression
-        // as before, rather than CommonBaseV, so DAGCombiner will zap it.
-        if (!CommonExprs->isZero()) {
-          if (L->contains(User.Inst))
-            RewriteExpr = SE->getAddExpr(RewriteExpr,
-                                       SE->getUnknown(CommonBaseV));
-          else
-            RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
-        }
-      }
-
-      // Now that we know what we need to do, insert code before User for the
-      // immediate and any loop-variant expressions.
-      if (BaseV)
-        // Add BaseV to the PHI value if needed.
-        RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
-
-      User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
-                                          Rewriter, L, this,
-                                          DeadInsts, SE);
-
-      // Mark old value we replaced as possibly dead, so that it is eliminated
-      // if we just replaced the last use of that value.
-      DeadInsts.push_back(User.OperandValToReplace);
-
-      UsersToProcess.pop_back();
-      ++NumReduced;
-
-      // If there are any more users to process with the same base, process them
-      // now.  We sorted by base above, so we just have to check the last elt.
-    } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
-    // TODO: Next, find out which base index is the most common, pull it out.
-  }
-
-  // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
-  // different starting values, into different PHIs.
-}
-
-void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
-  // Note: this processes each stride/type pair individually.  All users
-  // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
-  // Also, note that we iterate over IVUsesByStride indirectly by using
-  // StrideOrder. This extra layer of indirection makes the ordering of
-  // strides deterministic - not dependent on map order.
-  for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    // FIXME: Generalize to non-affine IV's.
-    if (!SI->first->isLoopInvariant(L))
-      continue;
-    StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+  case LSRUse::Special:
+    // Only handle -1 scales, or no scale.
+    return AM.Scale == 0 || AM.Scale == -1;
   }
+
+  return false;
 }
 
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
-                                           IVStrideUse *&CondUse,
-                                           const SCEV* &CondStride) {
-  for (unsigned Stride = 0, e = IU->StrideOrder.size();
-       Stride != e && !CondUse; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
-    for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-         E = SI->second->Users.end(); UI != E; ++UI)
-      if (UI->getUser() == Cond) {
-        // NOTE: we could handle setcc instructions with multiple uses here, but
-        // InstCombine does it as well for simple uses, it's not clear that it
-        // occurs enough in real life to handle.
-        CondUse = UI;
-        CondStride = SI->first;
-        return true;
-      }
+static bool isLegalUse(TargetLowering::AddrMode AM,
+                       int64_t MinOffset, int64_t MaxOffset,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  // Check for overflow.
+  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+      (MinOffset > 0))
+    return false;
+  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+    // Check for overflow.
+    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+        (MaxOffset > 0))
+      return false;
+    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+    return isLegalUse(AM, Kind, AccessTy, TLI);
   }
   return false;
 }
 
-namespace {
-  // Constant strides come first which in turns are sorted by their absolute
-  // values. If absolute values are the same, then positive strides comes first.
-  // e.g.
-  // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
-  struct StrideCompare {
-    const ScalarEvolution *SE;
-    explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
-
-    bool operator()(const SCEV *LHS, const SCEV *RHS) {
-      const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
-      const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
-      if (LHSC && RHSC) {
-        int64_t  LV = LHSC->getValue()->getSExtValue();
-        int64_t  RV = RHSC->getValue()->getSExtValue();
-        uint64_t ALV = (LV < 0) ? -LV : LV;
-        uint64_t ARV = (RV < 0) ? -RV : RV;
-        if (ALV == ARV) {
-          if (LV != RV)
-            return LV > RV;
-        } else {
-          return ALV < ARV;
-        }
+static bool isAlwaysFoldable(int64_t BaseOffs,
+                             GlobalValue *BaseGV,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI,
+                             ScalarEvolution &SE) {
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, Kind, AccessTy, TLI);
+}
 
-        // If it's the same value but different type, sort by bit width so
-        // that we emit larger induction variables before smaller
-        // ones, letting the smaller be re-written in terms of larger ones.
-        return SE->getTypeSizeInBits(RHS->getType()) <
-               SE->getTypeSizeInBits(LHS->getType());
-      }
-      return LHSC && !RHSC;
-    }
-  };
+static bool isAlwaysFoldable(const SCEV *S,
+                             int64_t MinOffset, int64_t MaxOffset,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI,
+                             ScalarEvolution &SE) {
+  // Fast-path: zero is always foldable.
+  if (S->isZero()) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  int64_t BaseOffs = ExtractImmediate(S, SE);
+  GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+  // If there's anything else involved, it's not foldable.
+  if (!S->isZero()) return false;
+
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
 }
 
-/// ChangeCompareStride - If a loop termination compare instruction is the
-/// only use of its stride, and the compaison is against a constant value,
-/// try eliminate the stride by moving the compare instruction to another
-/// stride and change its constant operand accordingly. e.g.
-///
-/// loop:
-/// ...
-/// v1 = v1 + 3
-/// v2 = v2 + 1
-/// if (v2 < 10) goto loop
-/// =>
-/// loop:
-/// ...
-/// v1 = v1 + 3
-/// if (v1 < 30) goto loop
-ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                                  IVStrideUse* &CondUse,
-                                                  const SCEV* &CondStride,
-                                                  bool PostPass) {
-  // If there's only one stride in the loop, there's nothing to do here.
-  if (IU->StrideOrder.size() < 2)
-    return Cond;
-  // If there are other users of the condition's stride, don't bother
-  // trying to change the condition because the stride will still
-  // remain.
-  std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
-    IU->IVUsesByStride.find(CondStride);
-  if (I == IU->IVUsesByStride.end())
-    return Cond;
-  if (I->second->Users.size() > 1) {
-    for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
-           EE = I->second->Users.end(); II != EE; ++II) {
-      if (II->getUser() == Cond)
-        continue;
-      if (!isInstructionTriviallyDead(II->getUser()))
-        return Cond;
-    }
+/// FormulaSorter - This class implements an ordering for formulae which sorts
+/// the by their standalone cost.
+class FormulaSorter {
+  /// These two sets are kept empty, so that we compute standalone costs.
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
+  Loop *L;
+  LSRUse *LU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+
+public:
+  FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
+    : L(l), LU(&lu), SE(se), DT(dt) {}
+
+  bool operator()(const Formula &A, const Formula &B) {
+    Cost CostA;
+    CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+    Regs.clear();
+    Cost CostB;
+    CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+    Regs.clear();
+    return CostA < CostB;
+  }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+  IVUsers &IU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+  const TargetLowering *const TLI;
+  Loop *const L;
+  bool Changed;
+
+  /// IVIncInsertPos - This is the insert position that the current loop's
+  /// induction variable increment should be placed. In simple loops, this is
+  /// the latch block's terminator. But in more complicated cases, this is a
+  /// position which will dominate all the in-loop post-increment users.
+  Instruction *IVIncInsertPos;
+
+  /// Factors - Interesting factors between use strides.
+  SmallSetVector<int64_t, 8> Factors;
+
+  /// Types - Interesting use types, to facilitate truncation reuse.
+  SmallSetVector<const Type *, 4> Types;
+
+  /// Fixups - The list of operands which are to be replaced.
+  SmallVector<LSRFixup, 16> Fixups;
+
+  /// Uses - The list of interesting uses.
+  SmallVector<LSRUse, 16> Uses;
+
+  /// RegUses - Track which uses use which register candidates.
+  RegUseTracker RegUses;
+
+  void OptimizeShadowIV();
+  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+  bool OptimizeLoopTermCond();
+
+  void CollectInterestingTypesAndFactors();
+  void CollectFixupsAndInitialFormulae();
+
+  LSRFixup &getNewFixup() {
+    Fixups.push_back(LSRFixup());
+    return Fixups.back();
   }
-  // Only handle constant strides for now.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
-  if (!SC) return Cond;
-
-  ICmpInst::Predicate Predicate = Cond->getPredicate();
-  int64_t CmpSSInt = SC->getValue()->getSExtValue();
-  unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
-  uint64_t SignBit = 1ULL << (BitWidth-1);
-  const Type *CmpTy = Cond->getOperand(0)->getType();
-  const Type *NewCmpTy = NULL;
-  unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
-  unsigned NewTyBits = 0;
-  const SCEV *NewStride = NULL;
-  Value *NewCmpLHS = NULL;
-  Value *NewCmpRHS = NULL;
-  int64_t Scale = 1;
-  const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
-    int64_t CmpVal = C->getValue().getSExtValue();
-
-    // Check the relevant induction variable for conformance to
-    // the pattern.
-    const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
-    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-    if (!AR || !AR->isAffine())
-      return Cond;
-
-    const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
-    // Check stride constant and the comparision constant signs to detect
-    // overflow.
-    if (StartC) {
-      if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
-          (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
-        return Cond;
-    } else {
-      // More restrictive check for the other cases.
-      if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
-        return Cond;
-    }
-
-    // Look for a suitable stride / iv as replacement.
-    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-        IU->IVUsesByStride.find(IU->StrideOrder[i]);
-      if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SSInt == CmpSSInt ||
-          abs64(SSInt) < abs64(CmpSSInt) ||
-          (SSInt % CmpSSInt) != 0)
-        continue;
 
-      Scale = SSInt / CmpSSInt;
-      int64_t NewCmpVal = CmpVal * Scale;
+  // Support for sharing of LSRUses between LSRFixups.
+  typedef DenseMap<const SCEV *, size_t> UseMapTy;
+  UseMapTy UseMap;
+
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+                          LSRUse::KindType Kind, const Type *AccessTy);
+
+  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+                                    LSRUse::KindType Kind,
+                                    const Type *AccessTy);
+
+public:
+  void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx);
+  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void CountRegisters(const Formula &F, size_t LUIdx);
+  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+  void CollectLoopInvariantFixupsAndFormulae();
+
+  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+                              unsigned Depth = 0);
+  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateCrossUseConstantOffsets();
+  void GenerateAllReuseFormulae();
+
+  void FilterOutUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceUsingHeuristics();
+
+  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                    Cost &SolutionCost,
+                    SmallVectorImpl<const Formula *> &Workspace,
+                    const Cost &CurCost,
+                    const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                    DenseSet<const SCEV *> &VisitedRegs) const;
+  void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+  Value *Expand(const LSRFixup &LF,
+                const Formula &F,
+                BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
+                SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakVH> &DeadInsts,
+                ScalarEvolution &SE, DominatorTree &DT) const;
+  void Rewrite(const LSRFixup &LF,
+               const Formula &F,
+               Loop *L, Instruction *IVIncInsertPos,
+               SCEVExpander &Rewriter,
+               SmallVectorImpl<WeakVH> &DeadInsts,
+               ScalarEvolution &SE, DominatorTree &DT,
+               Pass *P) const;
+  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                         Pass *P);
+
+  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+  bool getChanged() const { return Changed; }
+
+  void print_factors_and_types(raw_ostream &OS) const;
+  void print_fixups(raw_ostream &OS) const;
+  void print_uses(raw_ostream &OS) const;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-      // If old icmp value fits in icmp immediate field, but the new one doesn't
-      // try something else.
-      if (TLI &&
-          TLI->isLegalICmpImmediate(CmpVal) &&
-          !TLI->isLegalICmpImmediate(NewCmpVal))
-        continue;
+}
 
-      APInt Mul = APInt(BitWidth*2, CmpVal, true);
-      Mul = Mul * APInt(BitWidth*2, Scale, true);
-      // Check for overflow.
-      if (!Mul.isSignedIntN(BitWidth))
-        continue;
-      // Check for overflow in the stride's type too.
-      if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
-        continue;
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast opeation.
+void LSRInstance::OptimizeShadowIV() {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return;
 
-      // Watch out for overflow.
-      if (ICmpInst::isSigned(Predicate) &&
-          (CmpVal & SignBit) != (NewCmpVal & SignBit))
-        continue;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+       UI != E; /* empty */) {
+    IVUsers::const_iterator CandidateUI = UI;
+    ++UI;
+    Instruction *ShadowUse = CandidateUI->getUser();
+    const Type *DestTy = NULL;
 
-      // Pick the best iv to use trying to avoid a cast.
-      NewCmpLHS = NULL;
-      for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-             E = SI->second->Users.end(); UI != E; ++UI) {
-        Value *Op = UI->getOperandValToReplace();
-
-        // If the IVStrideUse implies a cast, check for an actual cast which
-        // can be used to find the original IV expression.
-        if (SE->getEffectiveSCEVType(Op->getType()) !=
-            SE->getEffectiveSCEVType(SI->first->getType())) {
-          CastInst *CI = dyn_cast<CastInst>(Op);
-          // If it's not a simple cast, it's complicated.
-          if (!CI)
-            continue;
-          // If it's a cast from a type other than the stride type,
-          // it's complicated.
-          if (CI->getOperand(0)->getType() != SI->first->getType())
-            continue;
-          // Ok, we found the IV expression in the stride's type.
-          Op = CI->getOperand(0);
-        }
+    /* If shadow use is a int->float cast then insert a second IV
+       to eliminate this cast.
 
-        NewCmpLHS = Op;
-        if (NewCmpLHS->getType() == CmpTy)
-          break;
-      }
-      if (!NewCmpLHS)
-        continue;
+         for (unsigned i = 0; i < n; ++i)
+           foo((double)i);
 
-      NewCmpTy = NewCmpLHS->getType();
-      NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
-      const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
-      if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
-        // Check if it is possible to rewrite it using
-        // an iv / stride of a smaller integer type.
-        unsigned Bits = NewTyBits;
-        if (ICmpInst::isSigned(Predicate))
-          --Bits;
-        uint64_t Mask = (1ULL << Bits) - 1;
-        if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
-          continue;
-      }
+       is transformed into
 
-      // Don't rewrite if use offset is non-constant and the new type is
-      // of a different type.
-      // FIXME: too conservative?
-      if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
-        continue;
+         double d = 0.0;
+         for (unsigned i = 0; i < n; ++i, ++d)
+           foo(d);
+    */
+    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+      DestTy = UCast->getDestTy();
+    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+      DestTy = SCast->getDestTy();
+    if (!DestTy) continue;
 
-      if (!PostPass) {
-        bool AllUsesAreAddresses = true;
-        bool AllUsesAreOutsideLoop = true;
-        std::vector<BasedUser> UsersToProcess;
-        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                                 AllUsesAreAddresses,
-                                                 AllUsesAreOutsideLoop,
-                                                 UsersToProcess);
-        // Avoid rewriting the compare instruction with an iv of new stride
-        // if it's likely the new stride uses will be rewritten using the
-        // stride of the compare instruction.
-        if (AllUsesAreAddresses &&
-            ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
-          continue;
-      }
+    if (TLI) {
+      // If target does not support DestTy natively then do not apply
+      // this transformation.
+      EVT DVT = TLI->getValueType(DestTy);
+      if (!TLI->isTypeLegal(DVT)) continue;
+    }
 
-      // Avoid rewriting the compare instruction with an iv which has
-      // implicit extension or truncation built into it.
-      // TODO: This is over-conservative.
-      if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
-        continue;
+    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+    if (!PH) continue;
+    if (PH->getNumIncomingValues() != 2) continue;
 
-      // If scale is negative, use swapped predicate unless it's testing
-      // for equality.
-      if (Scale < 0 && !Cond->isEquality())
-        Predicate = ICmpInst::getSwappedPredicate(Predicate);
+    const Type *SrcTy = PH->getType();
+    int Mantissa = DestTy->getFPMantissaWidth();
+    if (Mantissa == -1) continue;
+    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+      continue;
 
-      NewStride = IU->StrideOrder[i];
-      if (!isa<PointerType>(NewCmpTy))
-        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
-      else {
-        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
-        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
-      }
-      NewOffset = TyBits == NewTyBits
-        ? SE->getMulExpr(CondUse->getOffset(),
-                         SE->getConstant(CmpTy, Scale))
-        : SE->getConstant(NewCmpIntTy,
-          cast<SCEVConstant>(CondUse->getOffset())->getValue()
-            ->getSExtValue()*Scale);
-      break;
+    unsigned Entry, Latch;
+    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+      Entry = 0;
+      Latch = 1;
+    } else {
+      Entry = 1;
+      Latch = 0;
     }
-  }
 
-  // Forgo this transformation if it the increment happens to be
-  // unfortunately positioned after the condition, and the condition
-  // has multiple uses which prevent it from being moved immediately
-  // before the branch. See
-  // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
-  // for an example of this situation.
-  if (!Cond->hasOneUse()) {
-    for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
-         I != E; ++I)
-      if (I == NewCmpLHS)
-        return Cond;
-  }
+    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+    if (!Init) continue;
+    Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
-  if (NewCmpRHS) {
-    // Create a new compare instruction using new stride / iv.
-    ICmpInst *OldCond = Cond;
-    // Insert new compare instruction.
-    Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
-                        L->getHeader()->getName() + ".termcond");
+    BinaryOperator *Incr =
+      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+    if (!Incr) continue;
+    if (Incr->getOpcode() != Instruction::Add
+        && Incr->getOpcode() != Instruction::Sub)
+      continue;
+
+    /* Initialize new IV, double d = 0.0 in above example. */
+    ConstantInt *C = NULL;
+    if (Incr->getOperand(0) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+    else if (Incr->getOperand(1) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+    else
+      continue;
 
-    DEBUG(dbgs() << "    Change compare stride in Inst " << *OldCond);
-    DEBUG(dbgs() << " to " << *Cond << '\n');
+    if (!C) continue;
 
-    // Remove the old compare instruction. The old indvar is probably dead too.
-    DeadInsts.push_back(CondUse->getOperandValToReplace());
-    OldCond->replaceAllUsesWith(Cond);
-    OldCond->eraseFromParent();
+    // Ignore negative constants, as the code below doesn't handle them
+    // correctly. TODO: Remove this restriction.
+    if (!C->getValue().isStrictlyPositive()) continue;
 
-    IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
-    CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
-    CondStride = NewStride;
-    ++NumEliminated;
-    Changed = true;
+    /* Add new PHINode. */
+    PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+
+    /* create new increment. '++d' in above example. */
+    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+    BinaryOperator *NewIncr =
+      BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                               Instruction::FAdd : Instruction::FSub,
+                             NewPH, CFP, "IV.S.next.", Incr);
+
+    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+    /* Remove cast operation */
+    ShadowUse->replaceAllUsesWith(NewPH);
+    ShadowUse->eraseFromParent();
+    break;
   }
+}
 
-  return Cond;
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
+                                    IVStrideUse *&CondUse) {
+  for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+    if (UI->getUser() == Cond) {
+      // NOTE: we could handle setcc instructions with multiple uses here, but
+      // InstCombine does it as well for simple uses, it's not clear that it
+      // occurs enough in real life to handle.
+      CondUse = UI;
+      return true;
+    }
+  return false;
 }
 
 /// OptimizeMax - Rewrite the loop's terminating condition if it uses
@@ -2087,7 +1371,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// are designed around them. The most obvious example of this is the
 /// LoopInfo analysis, which doesn't remember trip count values. It
 /// expects to be able to rediscover the trip count each time it is
-/// needed, and it does this using a simple analyis that only succeeds if
+/// needed, and it does this using a simple analysis that only succeeds if
 /// the loop has a canonical induction variable.
 ///
 /// However, when it comes time to generate code, the maximum operation
@@ -2097,8 +1381,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
 /// the instructions for the maximum computation.
 ///
-ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
-                                          IVStrideUse* &CondUse) {
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   // Check that the loop matches the pattern we're looking for.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
       Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2107,19 +1390,19 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
   if (!Sel || !Sel->hasOneUse()) return Cond;
 
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
   if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
     return Cond;
   const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
-  if (Max != SE->getSCEV(Sel)) return Cond;
+  if (Max != SE.getSCEV(Sel)) return Cond;
 
   // To handle a max with more than two operands, this optimization would
   // require additional checking and setup.
@@ -2129,14 +1412,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   const SCEV *MaxLHS = Max->getOperand(0);
   const SCEV *MaxRHS = Max->getOperand(1);
   if (!MaxLHS || MaxLHS != One) return Cond;
-
   // Check the relevant induction variable for conformance to
   // the pattern.
-  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
   if (!AR || !AR->isAffine() ||
       AR->getStart() != One ||
-      AR->getStepRecurrence(*SE) != One)
+      AR->getStepRecurrence(SE) != One)
     return Cond;
 
   assert(AR->getLoop() == L &&
@@ -2145,9 +1427,9 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
+  if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
-  else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
+  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
   if (!NewRHS) return Cond;
 
@@ -2174,552 +1456,1764 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   return NewCond;
 }
 
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast opeation.
-void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+bool
+LSRInstance::OptimizeLoopTermCond() {
+  SmallPtrSet<Instruction *, 4> PostIncs;
 
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    return;
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+    // Get the terminating condition for the loop if possible.  If we
+    // can, we want to change it to use a post-incremented version of its
+    // induction variable, to allow coalescing the live ranges for the IV into
+    // one register value.
+
+    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+    if (!TermBr)
+      continue;
+    // FIXME: Overly conservative, termination condition could be an 'or' etc..
+    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+      continue;
 
-  for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
-       ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    if (!isa<SCEVConstant>(SI->first))
+    // Search IVUsesByStride to find Cond's IVUse if there is one.
+    IVStrideUse *CondUse = 0;
+    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+    if (!FindIVUserForCond(Cond, CondUse))
       continue;
 
-    for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-           E = SI->second->Users.end(); UI != E; /* empty */) {
-      ilist<IVStrideUse>::iterator CandidateUI = UI;
-      ++UI;
-      Instruction *ShadowUse = CandidateUI->getUser();
-      const Type *DestTy = NULL;
-
-      /* If shadow use is a int->float cast then insert a second IV
-         to eliminate this cast.
-
-           for (unsigned i = 0; i < n; ++i)
-             foo((double)i);
-
-         is transformed into
-
-           double d = 0.0;
-           for (unsigned i = 0; i < n; ++i, ++d)
-             foo(d);
-      */
-      if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
-        DestTy = UCast->getDestTy();
-      else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
-        DestTy = SCast->getDestTy();
-      if (!DestTy) continue;
-
-      if (TLI) {
-        // If target does not support DestTy natively then do not apply
-        // this transformation.
-        EVT DVT = TLI->getValueType(DestTy);
-        if (!TLI->isTypeLegal(DVT)) continue;
-      }
+    // If the trip count is computed in terms of a max (due to ScalarEvolution
+    // being unable to find a sufficient guard, for example), change the loop
+    // comparison to use SLT or ULT instead of NE.
+    // One consequence of doing this now is that it disrupts the count-down
+    // optimization. That's not always a bad thing though, because in such
+    // cases it may still be worthwhile to avoid a max.
+    Cond = OptimizeMax(Cond, CondUse);
+
+    // If this exiting block dominates the latch block, it may also use
+    // the post-inc value if it won't be shared with other uses.
+    // Check for dominance.
+    if (!DT.dominates(ExitingBlock, LatchBlock))
+      continue;
 
-      PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
-      if (!PH) continue;
-      if (PH->getNumIncomingValues() != 2) continue;
+    // Conservatively avoid trying to use the post-inc value in non-latch
+    // exits if there may be pre-inc users in intervening blocks.
+    if (LatchBlock != ExitingBlock)
+      for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+        // Test if the use is reachable from the exiting block. This dominator
+        // query is a conservative approximation of reachability.
+        if (&*UI != CondUse &&
+            !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+          // Conservatively assume there may be reuse if the quotient of their
+          // strides could be a legal scale.
+          const SCEV *A = CondUse->getStride();
+          const SCEV *B = UI->getStride();
+          if (SE.getTypeSizeInBits(A->getType()) !=
+              SE.getTypeSizeInBits(B->getType())) {
+            if (SE.getTypeSizeInBits(A->getType()) >
+                SE.getTypeSizeInBits(B->getType()))
+              B = SE.getSignExtendExpr(B, A->getType());
+            else
+              A = SE.getSignExtendExpr(A, B->getType());
+          }
+          if (const SCEVConstant *D =
+                dyn_cast_or_null<SCEVConstant>(getSDiv(B, A, SE))) {
+            // Stride of one or negative one can have reuse with non-addresses.
+            if (D->getValue()->isOne() ||
+                D->getValue()->isAllOnesValue())
+              goto decline_post_inc;
+            // Avoid weird situations.
+            if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
+                D->getValue()->getValue().isMinSignedValue())
+              goto decline_post_inc;
+            // Without TLI, assume that any stride might be valid, and so any
+            // use might be shared.
+            if (!TLI)
+              goto decline_post_inc;
+            // Check for possible scaled-address reuse.
+            const Type *AccessTy = getAccessType(UI->getUser());
+            TargetLowering::AddrMode AM;
+            AM.Scale = D->getValue()->getSExtValue();
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+            AM.Scale = -AM.Scale;
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+          }
+        }
 
-      const Type *SrcTy = PH->getType();
-      int Mantissa = DestTy->getFPMantissaWidth();
-      if (Mantissa == -1) continue;
-      if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
-        continue;
+    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
+                 << *Cond << '\n');
 
-      unsigned Entry, Latch;
-      if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
-        Entry = 0;
-        Latch = 1;
+    // It's possible for the setcc instruction to be anywhere in the loop, and
+    // possible for it to have multiple users.  If it is not immediately before
+    // the exiting block branch, move it.
+    if (&*++BasicBlock::iterator(Cond) != TermBr) {
+      if (Cond->hasOneUse()) {
+        Cond->moveBefore(TermBr);
       } else {
-        Entry = 1;
-        Latch = 0;
+        // Clone the terminating condition and insert into the loopend.
+        ICmpInst *OldCond = Cond;
+        Cond = cast<ICmpInst>(Cond->clone());
+        Cond->setName(L->getHeader()->getName() + ".termcond");
+        ExitingBlock->getInstList().insert(TermBr, Cond);
+
+        // Clone the IVUse, as the old use still exists!
+        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+                              Cond, CondUse->getOperandValToReplace());
+        TermBr->replaceUsesOfWith(OldCond, Cond);
       }
+    }
 
-      ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
-      if (!Init) continue;
-      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+    // If we get to here, we know that we can transform the setcc instruction to
+    // use the post-incremented version of the IV, allowing us to coalesce the
+    // live ranges for the IV correctly.
+    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
+                                       CondUse->getStride()));
+    CondUse->setIsUseOfPostIncrementedValue(true);
+    Changed = true;
 
-      BinaryOperator *Incr =
-        dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
-      if (!Incr) continue;
-      if (Incr->getOpcode() != Instruction::Add
-          && Incr->getOpcode() != Instruction::Sub)
-        continue;
+    PostIncs.insert(Cond);
+  decline_post_inc:;
+  }
 
-      /* Initialize new IV, double d = 0.0 in above example. */
-      ConstantInt *C = NULL;
-      if (Incr->getOperand(0) == PH)
-        C = dyn_cast<ConstantInt>(Incr->getOperand(1));
-      else if (Incr->getOperand(1) == PH)
-        C = dyn_cast<ConstantInt>(Incr->getOperand(0));
-      else
-        continue;
+  // Determine an insertion point for the loop induction variable increment. It
+  // must dominate all the post-inc comparisons we just set up, and it must
+  // dominate the loop latch edge.
+  IVIncInsertPos = L->getLoopLatch()->getTerminator();
+  for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+       E = PostIncs.end(); I != E; ++I) {
+    BasicBlock *BB =
+      DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+                                    (*I)->getParent());
+    if (BB == (*I)->getParent())
+      IVIncInsertPos = *I;
+    else if (BB != IVIncInsertPos->getParent())
+      IVIncInsertPos = BB->getTerminator();
+  }
 
-      if (!C) continue;
+  return Changed;
+}
+
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+                                LSRUse::KindType Kind, const Type *AccessTy) {
+  int64_t NewMinOffset = LU.MinOffset;
+  int64_t NewMaxOffset = LU.MaxOffset;
+  const Type *NewAccessTy = AccessTy;
+
+  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+  // something conservative, however this can pessimize in the case that one of
+  // the uses will have all its uses outside the loop, for example.
+  if (LU.Kind != Kind)
+    return false;
+  // Conservatively assume HasBaseReg is true for now.
+  if (NewOffset < LU.MinOffset) {
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+                          Kind, AccessTy, TLI, SE))
+      return false;
+    NewMinOffset = NewOffset;
+  } else if (NewOffset > LU.MaxOffset) {
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+                          Kind, AccessTy, TLI, SE))
+      return false;
+    NewMaxOffset = NewOffset;
+  }
+  // Check for a mismatched access type, and fall back conservatively as needed.
+  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+    NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+  // Update the use.
+  LU.MinOffset = NewMinOffset;
+  LU.MaxOffset = NewMaxOffset;
+  LU.AccessTy = NewAccessTy;
+  if (NewOffset != LU.Offsets.back())
+    LU.Offsets.push_back(NewOffset);
+  return true;
+}
 
-      // Ignore negative constants, as the code below doesn't handle them
-      // correctly. TODO: Remove this restriction.
-      if (!C->getValue().isStrictlyPositive()) continue;
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an exisitng use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+                    LSRUse::KindType Kind, const Type *AccessTy) {
+  const SCEV *Copy = Expr;
+  int64_t Offset = ExtractImmediate(Expr, SE);
+
+  // Basic uses can't accept any offset, for example.
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true,
+                        Kind, AccessTy, TLI, SE)) {
+    Expr = Copy;
+    Offset = 0;
+  }
 
-      /* Add new PHINode. */
-      PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+  std::pair<UseMapTy::iterator, bool> P =
+    UseMap.insert(std::make_pair(Expr, 0));
+  if (!P.second) {
+    // A use already existed with this base.
+    size_t LUIdx = P.first->second;
+    LSRUse &LU = Uses[LUIdx];
+    if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+      // Reuse this use.
+      return std::make_pair(LUIdx, Offset);
+  }
 
-      /* create new increment. '++d' in above example. */
-      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
-      BinaryOperator *NewIncr =
-        BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
-                                 Instruction::FAdd : Instruction::FSub,
-                               NewPH, CFP, "IV.S.next.", Incr);
+  // Create a new use.
+  size_t LUIdx = Uses.size();
+  P.first->second = LUIdx;
+  Uses.push_back(LSRUse(Kind, AccessTy));
+  LSRUse &LU = Uses[LUIdx];
 
-      NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
-      NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+  // We don't need to track redundant offsets, but we don't need to go out
+  // of our way here to avoid them.
+  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+    LU.Offsets.push_back(Offset);
 
-      /* Remove cast operation */
-      ShadowUse->replaceAllUsesWith(NewPH);
-      ShadowUse->eraseFromParent();
-      NumShadow++;
-      break;
+  LU.MinOffset = Offset;
+  LU.MaxOffset = Offset;
+  return std::make_pair(LUIdx, Offset);
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+  SmallSetVector<const SCEV *, 4> Strides;
+
+  // Collect interesting types and factors.
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    const SCEV *Stride = UI->getStride();
+
+    // Collect interesting types.
+    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
+
+    // Collect interesting factors.
+    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+         Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd;
+         ++NewStrideIter) {
+      const SCEV *OldStride = Stride;
+      const SCEV *NewStride = *NewStrideIter;
+      if (OldStride == NewStride)
+        continue;
+
+      if (SE.getTypeSizeInBits(OldStride->getType()) !=
+          SE.getTypeSizeInBits(NewStride->getType())) {
+        if (SE.getTypeSizeInBits(OldStride->getType()) >
+            SE.getTypeSizeInBits(NewStride->getType()))
+          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+        else
+          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+      }
+      if (const SCEVConstant *Factor =
+            dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
+                                                   SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      } else if (const SCEVConstant *Factor =
+                   dyn_cast_or_null<SCEVConstant>(getSDiv(OldStride, NewStride,
+                                                          SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      }
     }
+    Strides.insert(Stride);
   }
-}
 
-/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-/// uses in the loop, look to see if we can eliminate some, in favor of using
-/// common indvars for the different uses.
-void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
-  // TODO: implement optzns here.
+  // If all uses use the same type, don't bother looking for truncation-based
+  // reuse.
+  if (Types.size() == 1)
+    Types.clear();
 
-  OptimizeShadowIV(L);
+  DEBUG(print_factors_and_types(dbgs()));
 }
 
-bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
-                                             bool CheckPreInc) {
-  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    const SCEV *Share = SI->first;
-    if (!isa<SCEVConstant>(SI->first) || Share == Stride)
-      continue;
-    int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
-    if (SSInt == SInt)
-      return true; // This can definitely be reused.
-    if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
-      continue;
-    int64_t Scale = SSInt / SInt;
-    bool AllUsesAreAddresses = true;
-    bool AllUsesAreOutsideLoop = true;
-    std::vector<BasedUser> UsersToProcess;
-    const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                             AllUsesAreAddresses,
-                                             AllUsesAreOutsideLoop,
-                                             UsersToProcess);
-    if (AllUsesAreAddresses &&
-        ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
-      if (!CheckPreInc)
-        return true;
-      // Any pre-inc iv use?
-      IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
-      for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
-             E = StrideUses.Users.end(); I != E; ++I) {
-        if (!I->isUseOfPostIncrementedValue())
-          return true;
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    // Record the uses.
+    LSRFixup &LF = getNewFixup();
+    LF.UserInst = UI->getUser();
+    LF.OperandValToReplace = UI->getOperandValToReplace();
+    if (UI->isUseOfPostIncrementedValue())
+      LF.PostIncLoop = L;
+
+    LSRUse::KindType Kind = LSRUse::Basic;
+    const Type *AccessTy = 0;
+    if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+      Kind = LSRUse::Address;
+      AccessTy = getAccessType(LF.UserInst);
+    }
+
+    const SCEV *S = IU.getCanonicalExpr(*UI);
+
+    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+    // (N - i == 0), and this allows (N - i) to be the expression that we work
+    // with rather than just N or i, so we can consider the register
+    // requirements for both N and i at the same time. Limiting this code to
+    // equality icmps is not a problem because all interesting loops use
+    // equality icmps, thanks to IndVarSimplify.
+    if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+      if (CI->isEquality()) {
+        // Swap the operands if needed to put the OperandValToReplace on the
+        // left, for consistency.
+        Value *NV = CI->getOperand(1);
+        if (NV == LF.OperandValToReplace) {
+          CI->setOperand(1, CI->getOperand(0));
+          CI->setOperand(0, NV);
+        }
+
+        // x == y  -->  x - y == 0
+        const SCEV *N = SE.getSCEV(NV);
+        if (N->isLoopInvariant(L)) {
+          Kind = LSRUse::ICmpZero;
+          S = SE.getMinusSCEV(N, S);
+        }
+
+        // -1 and the negations of all interesting strides (except the negation
+        // of -1) are now also interesting.
+        for (size_t i = 0, e = Factors.size(); i != e; ++i)
+          if (Factors[i] != -1)
+            Factors.insert(-(uint64_t)Factors[i]);
+        Factors.insert(-1);
       }
+
+    // Set up the initial formula for this use.
+    std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+    LF.LUIdx = P.first;
+    LF.Offset = P.second;
+    LSRUse &LU = Uses[LF.LUIdx];
+    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+
+    // If this is the first use of this LSRUse, give it a formula.
+    if (LU.Formulae.empty()) {
+      InsertInitialFormula(S, L, LU, LF.LUIdx);
+      CountRegisters(LU.Formulae.back(), LF.LUIdx);
     }
   }
-  return false;
+
+  DEBUG(print_fixups(dbgs()));
 }
 
-/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
-/// conditional branch or it's and / or with other conditions before being used
-/// as the condition.
-static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
-  BasicBlock *CondBB = Cond->getParent();
-  if (!L->isLoopExiting(CondBB))
-    return false;
-  BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
-  if (!TermBr || !TermBr->isConditional())
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L,
+                                  LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.InitialMatch(S, L, SE, DT);
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+                                       LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.BaseRegs.push_back(S);
+  F.AM.HasBaseReg = true;
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+  if (F.ScaledReg)
+    RegUses.CountRegister(F.ScaledReg, LUIdx);
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I)
+    RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+  if (!LU.InsertFormula(LUIdx, F))
     return false;
 
-  Value *User = *Cond->use_begin();
-  Instruction *UserInst = dyn_cast<Instruction>(User);
-  while (UserInst &&
-         (UserInst->getOpcode() == Instruction::And ||
-          UserInst->getOpcode() == Instruction::Or)) {
-    if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
-      return false;
-    User = *User->use_begin();
-    UserInst = dyn_cast<Instruction>(User);
+  CountRegisters(F, LUIdx);
+  return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+  SmallPtrSet<const SCEV *, 8> Inserted;
+
+  while (!Worklist.empty()) {
+    const SCEV *S = Worklist.pop_back_val();
+
+    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+      Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+    else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+      Worklist.push_back(C->getOperand());
+    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      Worklist.push_back(D->getLHS());
+      Worklist.push_back(D->getRHS());
+    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+      if (!Inserted.insert(U)) continue;
+      const Value *V = U->getValue();
+      if (const Instruction *Inst = dyn_cast<Instruction>(V))
+        if (L->contains(Inst)) continue;
+      for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+           UI != UE; ++UI) {
+        const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+        // Ignore non-instructions.
+        if (!UserInst)
+          continue;
+        // Ignore instructions in other functions (as can happen with
+        // Constants).
+        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+          continue;
+        // Ignore instructions not dominated by the loop.
+        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+          UserInst->getParent() :
+          cast<PHINode>(UserInst)->getIncomingBlock(
+            PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+        if (!DT.dominates(L->getHeader(), UseBB))
+          continue;
+        // Ignore uses which are part of other SCEV expressions, to avoid
+        // analyzing them multiple times.
+        if (SE.isSCEVable(UserInst->getType()) &&
+            !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
+          continue;
+        // Ignore icmp instructions which are already being analyzed.
+        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+          unsigned OtherIdx = !UI.getOperandNo();
+          Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+          if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+            continue;
+        }
+
+        LSRFixup &LF = getNewFixup();
+        LF.UserInst = const_cast<Instruction *>(UserInst);
+        LF.OperandValToReplace = UI.getUse();
+        std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+        LF.LUIdx = P.first;
+        LF.Offset = P.second;
+        LSRUse &LU = Uses[LF.LUIdx];
+        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        InsertSupplementalFormula(U, LU, LF.LUIdx);
+        CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+        break;
+      }
+    }
   }
-  return User == TermBr;
 }
 
-static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
-                              ScalarEvolution *SE, Loop *L,
-                              const TargetLowering *TLI = 0) {
-  if (!L->contains(Cond))
-    return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, C, Ops, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop()), C, Ops, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, SE);
+      return;
+    }
+  } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    // Break (C * (a + b + c)) into C*a + C*b + C*c.
+    if (Mul->getNumOperands() == 2)
+      if (const SCEVConstant *Op0 =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+        CollectSubexprs(Mul->getOperand(1),
+                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+                        Ops, SE);
+        return;
+      }
+  }
 
-  if (!isa<SCEVConstant>(CondUse->getOffset()))
-    return false;
+  // Otherwise use the value itself.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
 
-  // Handle only tests for equality for the moment.
-  if (!Cond->isEquality() || !Cond->hasOneUse())
-    return false;
-  if (!isUsedByExitBranch(Cond, L))
-    return false;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base,
+                                         unsigned Depth) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *BaseReg = Base.BaseRegs[i];
+
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, SE);
+    if (AddOps.size() == 1) continue;
+
+    for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+         JE = AddOps.end(); J != JE; ++J) {
+      // Don't pull a constant into a register if the constant could be folded
+      // into an immediate field.
+      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
 
-  Value *CondOp0 = Cond->getOperand(0);
-  const SCEV *IV = SE->getSCEV(CondOp0);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-  if (!AR || !AR->isAffine())
-    return false;
+      // Collect all operands except *J.
+      SmallVector<const SCEV *, 8> InnerAddOps;
+      for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
+           KE = AddOps.end(); K != KE; ++K)
+        if (K != J)
+          InnerAddOps.push_back(*K);
+
+      // Don't leave just a constant behind in a register if the constant could
+      // be folded into an immediate field.
+      if (InnerAddOps.size() == 1 &&
+          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
 
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
-  if (!SC || SC->getValue()->getSExtValue() < 0)
-    // If it's already counting down, don't do anything.
-    return false;
+      Formula F = Base;
+      F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+      F.BaseRegs.push_back(*J);
+      if (InsertFormula(LU, LUIdx, F))
+        // If that formula hadn't been seen before, recurse to find more like
+        // it.
+        GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+    }
+  }
+}
 
-  // If the RHS of the comparison is not an loop invariant, the rewrite
-  // cannot be done. Also bail out if it's already comparing against a zero.
-  // If we are checking this before cmp stride optimization, check if it's
-  // comparing against a already legal immediate.
-  Value *RHS = Cond->getOperand(1);
-  ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
-  if (!L->isLoopInvariant(RHS) ||
-      (RHSC && RHSC->isZero()) ||
-      (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
-    return false;
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+                                       Formula Base) {
+  // This method is only intersting on a plurality of registers.
+  if (Base.BaseRegs.size() <= 1) return;
+
+  Formula F = Base;
+  F.BaseRegs.clear();
+  SmallVector<const SCEV *, 4> Ops;
+  for (SmallVectorImpl<const SCEV *>::const_iterator
+       I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
+        !BaseReg->hasComputableLoopEvolution(L))
+      Ops.push_back(BaseReg);
+    else
+      F.BaseRegs.push_back(BaseReg);
+  }
+  if (Ops.size() > 1) {
+    const SCEV *Sum = SE.getAddExpr(Ops);
+    // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+    // opportunity to fold something. For now, just ignore such cases
+    // rather than procede with zero in a register.
+    if (!Sum->isZero()) {
+      F.BaseRegs.push_back(Sum);
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
 
-  // Make sure the IV is only used for counting.  Value may be preinc or
-  // postinc; 2 uses in either case.
-  if (!CondOp0->hasNUses(2))
-    return false;
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // We can't add a symbolic offset if the address already contains one.
+  if (Base.AM.BaseGV) return;
 
-  return true;
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+    GlobalValue *GV = ExtractSymbol(G, SE);
+    if (G->isZero() || !GV)
+      continue;
+    Formula F = Base;
+    F.AM.BaseGV = GV;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
 }
 
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
-void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  bool LatchExit = L->isLoopExiting(LatchBlock);
-  SmallVector<BasicBlock*, 8> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // TODO: For now, just add the min and max offset, because it usually isn't
+  // worthwhile looking at everything inbetween.
+  SmallVector<int64_t, 4> Worklist;
+  Worklist.push_back(LU.MinOffset);
+  if (LU.MaxOffset != LU.MinOffset)
+    Worklist.push_back(LU.MaxOffset);
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+
+    for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+         E = Worklist.end(); I != E; ++I) {
+      Formula F = Base;
+      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+                     LU.Kind, LU.AccessTy, TLI)) {
+        F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType()));
+
+        (void)InsertFormula(LU, LUIdx, F);
+      }
+    }
 
-  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    BasicBlock *ExitingBlock = ExitingBlocks[i];
+    int64_t Imm = ExtractImmediate(G, SE);
+    if (G->isZero() || Imm == 0)
+      continue;
+    Formula F = Base;
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
 
-    // Finally, get the terminating condition for the loop if possible.  If we
-    // can, we want to change it to use a post-incremented version of its
-    // induction variable, to allow coalescing the live ranges for the IV into
-    // one register value.
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base) {
+  if (LU.Kind != LSRUse::ICmpZero) return;
 
-    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-    if (!TermBr)
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+  if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+  // Don't do this if there is more than one offset.
+  if (LU.MinOffset != LU.MaxOffset) return;
+
+  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+    Formula F = Base;
+
+    // Check that the multiplication doesn't overflow.
+    F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+    if ((int64_t)F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
       continue;
-    // FIXME: Overly conservative, termination condition could be an 'or' etc..
-    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+
+    // Check that multiplying with the use offset doesn't overflow.
+    int64_t Offset = LU.MinOffset;
+    Offset = (uint64_t)Offset * Factor;
+    if ((int64_t)Offset / Factor != LU.MinOffset)
       continue;
 
-    // Search IVUsesByStride to find Cond's IVUse if there is one.
-    IVStrideUse *CondUse = 0;
-    const SCEV *CondStride = 0;
-    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
-    if (!FindIVUserForCond(Cond, CondUse, CondStride))
+    // Check that this scale is legal.
+    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
       continue;
 
-    // If the latch block is exiting and it's not a single block loop, it's
-    // not safe to use postinc iv in other exiting blocks. FIXME: overly
-    // conservative? How about icmp stride optimization?
-    bool UsePostInc =  !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
-    if (UsePostInc && ExitingBlock != LatchBlock) {
-      if (!Cond->hasOneUse())
-        // See below, we don't want the condition to be cloned.
-        UsePostInc = false;
-      else {
-        // If exiting block is the latch block, we know it's safe and profitable
-        // to transform the icmp to use post-inc iv. Otherwise do so only if it
-        // would not reuse another iv and its iv would be reused by other uses.
-        // We are optimizing for the case where the icmp is the only use of the
-        // iv.
-        IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
-        for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
-               E = StrideUses.Users.end(); I != E; ++I) {
-          if (I->getUser() == Cond)
-            continue;
-          if (!I->isUseOfPostIncrementedValue()) {
-            UsePostInc = false;
-            break;
-          }
+    // Compensate for the use having MinOffset built into it.
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+    const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+
+    // Check that multiplying with each base register doesn't overflow.
+    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+      if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+        goto next;
+    }
+
+    // Check that multiplying with the scaled register doesn't overflow.
+    if (F.ScaledReg) {
+      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+      if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+        continue;
+    }
+
+    // If we make it here and it's legal, add it.
+    (void)InsertFormula(LU, LUIdx, F);
+  next:;
+  }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
+                                 Formula Base) {
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+
+  // If this Formula already has a scaled register, we can't add another one.
+  if (Base.AM.Scale != 0) return;
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    Base.AM.Scale = Factor;
+    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    // Check whether this scale is going to be legal.
+    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI)) {
+      // As a special-case, handle special out-of-loop Basic users specially.
+      // TODO: Reconsider this special case.
+      if (LU.Kind == LSRUse::Basic &&
+          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                     LSRUse::Special, LU.AccessTy, TLI) &&
+          LU.AllFixupsOutsideLoop)
+        LU.Kind = LSRUse::Special;
+      else
+        continue;
+    }
+    // For an ICmpZero, negating a solitary base register won't lead to
+    // new solutions.
+    if (LU.Kind == LSRUse::ICmpZero &&
+        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+      continue;
+    // For each addrec base reg, apply the scale, if possible.
+    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+      if (const SCEVAddRecExpr *AR =
+            dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+        const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+        if (FactorS->isZero())
+          continue;
+        // Divide out the factor, ignoring high bits, since we'll be
+        // scaling the value back up in the end.
+        if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
+          // TODO: This could be optimized to avoid all the copying.
+          Formula F = Base;
+          F.ScaledReg = Quotient;
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+          (void)InsertFormula(LU, LUIdx, F);
         }
       }
+  }
+}
 
-      // If iv for the stride might be shared and any of the users use pre-inc
-      // iv might be used, then it's not safe to use post-inc iv.
-      if (UsePostInc &&
-          isa<SCEVConstant>(CondStride) &&
-          StrideMightBeShared(CondStride, L, true))
-        UsePostInc = false;
-    }
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
+                                    Formula Base) {
+  // This requires TargetLowering to tell us which truncates are free.
+  if (!TLI) return;
+
+  // Don't bother truncating symbolic values.
+  if (Base.AM.BaseGV) return;
+
+  // Determine the integer type for the base formula.
+  const Type *DstTy = Base.getType();
+  if (!DstTy) return;
+  DstTy = SE.getEffectiveSCEVType(DstTy);
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    const Type *SrcTy = *I;
+    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+      Formula F = Base;
+
+      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+      for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J)
+        *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+      // TODO: This assumes we've done basic processing on all uses and
+      // have an idea what the register usage is.
+      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+        continue;
 
-    // If the trip count is computed in terms of a max (due to ScalarEvolution
-    // being unable to find a sufficient guard, for example), change the loop
-    // comparison to use SLT or ULT instead of NE.
-    Cond = OptimizeMax(L, Cond, CondUse);
-
-    // If possible, change stride and operands of the compare instruction to
-    // eliminate one stride. However, avoid rewriting the compare instruction
-    // with an iv of new stride if it's likely the new stride uses will be
-    // rewritten using the stride of the compare instruction.
-    if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
-      // If the condition stride is a constant and it's the only use, we might
-      // want to optimize it first by turning it to count toward zero.
-      if (!StrideMightBeShared(CondStride, L, false) &&
-          !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
-        Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+      (void)InsertFormula(LU, LUIdx, F);
     }
+  }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+  size_t LUIdx;
+  int64_t Imm;
+  const SCEV *OrigReg;
+
+  WorkItem(size_t LI, int64_t I, const SCEV *R)
+    : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+     << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+  // Group the registers by their value without any added constant offset.
+  typedef std::map<int64_t, const SCEV *> ImmMapTy;
+  typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+  RegMapTy Map;
+  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+  SmallVector<const SCEV *, 8> Sequence;
+  for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+       I != E; ++I) {
+    const SCEV *Reg = *I;
+    int64_t Imm = ExtractImmediate(Reg, SE);
+    std::pair<RegMapTy::iterator, bool> Pair =
+      Map.insert(std::make_pair(Reg, ImmMapTy()));
+    if (Pair.second)
+      Sequence.push_back(Reg);
+    Pair.first->second.insert(std::make_pair(Imm, *I));
+    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+  }
 
-    if (!UsePostInc)
+  // Now examine each set of registers with the same base value. Build up
+  // a list of work to do and do the work in a separate step so that we're
+  // not adding formulae and register counts while we're searching.
+  SmallVector<WorkItem, 32> WorkItems;
+  SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+       E = Sequence.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    const ImmMapTy &Imms = Map.find(Reg)->second;
+
+    // It's not worthwhile looking for reuse if there's only one offset.
+    if (Imms.size() == 1)
       continue;
 
-    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
-          << *Cond << '\n');
+    DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+          for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+               J != JE; ++J)
+            dbgs() << ' ' << J->first;
+          dbgs() << '\n');
 
-    // It's possible for the setcc instruction to be anywhere in the loop, and
-    // possible for it to have multiple users.  If it is not immediately before
-    // the exiting block branch, move it.
-    if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
-      if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
-        Cond->moveBefore(TermBr);
-      } else {
-        // Otherwise, clone the terminating condition and insert into the
-        // loopend.
-        Cond = cast<ICmpInst>(Cond->clone());
-        Cond->setName(L->getHeader()->getName() + ".termcond");
-        ExitingBlock->getInstList().insert(TermBr, Cond);
+    // Examine each offset.
+    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+         J != JE; ++J) {
+      const SCEV *OrigReg = J->second;
 
-        // Clone the IVUse, as the old use still exists!
-        IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
-                                             CondUse->getOperandValToReplace());
-        CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+      int64_t JImm = J->first;
+      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+      if (!isa<SCEVConstant>(OrigReg) &&
+          UsedByIndicesMap[Reg].count() == 1) {
+        DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+        continue;
+      }
+
+      // Conservatively examine offsets between this orig reg a few selected
+      // other orig regs.
+      ImmMapTy::const_iterator OtherImms[] = {
+        Imms.begin(), prior(Imms.end()),
+        Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+      };
+      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+        ImmMapTy::const_iterator M = OtherImms[i];
+        if (M == J || M == JE) continue;
+
+        // Compute the difference between the two.
+        int64_t Imm = (uint64_t)JImm - M->first;
+        for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+             LUIdx = UsedByIndices.find_next(LUIdx))
+          // Make a memo of this use, offset, and register tuple.
+          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
       }
     }
+  }
 
-    // If we get to here, we know that we can transform the setcc instruction to
-    // use the post-incremented version of the IV, allowing us to coalesce the
-    // live ranges for the IV correctly.
-    CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
-    CondUse->setIsUseOfPostIncrementedValue(true);
-    Changed = true;
+  Map.clear();
+  Sequence.clear();
+  UsedByIndicesMap.clear();
+  UniqueItems.clear();
+
+  // Now iterate through the worklist and add new formulae.
+  for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+       E = WorkItems.end(); I != E; ++I) {
+    const WorkItem &WI = *I;
+    size_t LUIdx = WI.LUIdx;
+    LSRUse &LU = Uses[LUIdx];
+    int64_t Imm = WI.Imm;
+    const SCEV *OrigReg = WI.OrigReg;
+
+    const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+    const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+    // TODO: Use a more targetted data structure.
+    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+      Formula F = LU.Formulae[L];
+      // Use the immediate in the scaled register.
+      if (F.ScaledReg == OrigReg) {
+        int64_t Offs = (uint64_t)F.AM.BaseOffs +
+                       Imm * (uint64_t)F.AM.Scale;
+        // Don't create 50 + reg(-50).
+        if (F.referencesReg(SE.getSCEV(
+                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+          continue;
+        Formula NewF = F;
+        NewF.AM.BaseOffs = Offs;
+        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                        LU.Kind, LU.AccessTy, TLI))
+          continue;
+        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+        // If the new scale is a constant in a register, and adding the constant
+        // value to the immediate would produce a value closer to zero than the
+        // immediate itself, then the formula isn't worthwhile.
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+          if (C->getValue()->getValue().isNegative() !=
+                (NewF.AM.BaseOffs < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+                .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+            continue;
 
-    ++NumLoopCond;
+        // OK, looks good.
+        (void)InsertFormula(LU, LUIdx, NewF);
+      } else {
+        // Use the immediate in a base register.
+        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+          const SCEV *BaseReg = F.BaseRegs[N];
+          if (BaseReg != OrigReg)
+            continue;
+          Formula NewF = F;
+          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI))
+            continue;
+          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+          // If the new formula has a constant in a register, and adding the
+          // constant value to the immediate would produce a value closer to
+          // zero than the immediate itself, then the formula isn't worthwhile.
+          for (SmallVectorImpl<const SCEV *>::const_iterator
+               J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+               J != JE; ++J)
+            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+              if (C->getValue()->getValue().isNegative() !=
+                    (NewF.AM.BaseOffs < 0) &&
+                  C->getValue()->getValue().abs()
+                    .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                goto skip_formula;
+
+          // Ok, looks good.
+          (void)InsertFormula(LU, LUIdx, NewF);
+          break;
+        skip_formula:;
+        }
+      }
+    }
   }
 }
 
-bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
-                                                     IVStrideUse* &CondUse,
-                                                     Loop *L) {
-  // If the only use is an icmp of a loop exiting conditional branch, then
-  // attempt the optimization.
-  BasedUser User = BasedUser(*CondUse, SE);
-  assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
-  ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+  // queries are more precise.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateScales(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+  }
 
-  // Less strict check now that compare stride optimization is done.
-  if (!ShouldCountToZero(Cond, CondUse, SE, L))
-    return false;
+  GenerateCrossUseConstantOffsets();
+}
 
-  Value *CondOp0 = Cond->getOperand(0);
-  PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
-  Instruction *Incr;
-  if (!PHIExpr) {
-    // Value tested is postinc. Find the phi node.
-    Incr = dyn_cast<BinaryOperator>(CondOp0);
-    // FIXME: Just use User.OperandValToReplace here?
-    if (!Incr || Incr->getOpcode() != Instruction::Add)
-      return false;
+/// If their are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+#ifndef NDEBUG
+  bool Changed = false;
+#endif
+
+  // Collect the best formula for each unique set of shared registers. This
+  // is reset for each use.
+  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+    BestFormulaeTy;
+  BestFormulaeTy BestFormulae;
+
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    FormulaSorter Sorter(L, LU, SE, DT);
+
+    // Clear out the set of used regs; it will be recomputed.
+    LU.Regs.clear();
+
+    for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+         FIdx != NumForms; ++FIdx) {
+      Formula &F = LU.Formulae[FIdx];
+
+      SmallVector<const SCEV *, 2> Key;
+      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J) {
+        const SCEV *Reg = *J;
+        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+          Key.push_back(Reg);
+      }
+      if (F.ScaledReg &&
+          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+        Key.push_back(F.ScaledReg);
+      // Unstable sort by host order ok, because this is only used for
+      // uniquifying.
+      std::sort(Key.begin(), Key.end());
+
+      std::pair<BestFormulaeTy::const_iterator, bool> P =
+        BestFormulae.insert(std::make_pair(Key, FIdx));
+      if (!P.second) {
+        Formula &Best = LU.Formulae[P.first->second];
+        if (Sorter.operator()(F, Best))
+          std::swap(F, Best);
+        DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+              dbgs() << "\n"
+                        "  in favor of "; Best.print(dbgs());
+              dbgs() << '\n');
+#ifndef NDEBUG
+        Changed = true;
+#endif
+        std::swap(F, LU.Formulae.back());
+        LU.Formulae.pop_back();
+        --FIdx;
+        --NumForms;
+        continue;
+      }
+      if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+      LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+    }
+    BestFormulae.clear();
+  }
 
-    PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
-    if (!PHIExpr)
-      return false;
-    // 1 use for preinc value, the increment.
-    if (!PHIExpr->hasOneUse())
-      return false;
-  } else {
-    assert(isa<PHINode>(CondOp0) &&
-           "Unexpected loop exiting counting instruction sequence!");
-    PHIExpr = cast<PHINode>(CondOp0);
-    // Value tested is preinc.  Find the increment.
-    // A CmpInst is not a BinaryOperator; we depend on this.
-    Instruction::use_iterator UI = PHIExpr->use_begin();
-    Incr = dyn_cast<BinaryOperator>(UI);
-    if (!Incr)
-      Incr = dyn_cast<BinaryOperator>(++UI);
-    // One use for postinc value, the phi.  Unnecessarily conservative?
-    if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
-      return false;
+  DEBUG(if (Changed) {
+          dbgs() << "\n"
+                    "After filtering out undesirable candidates:\n";
+          print_uses(dbgs());
+        });
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extrordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extrordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  // This is a rough guess that seems to work fairly well.
+  const size_t Limit = UINT16_MAX;
+
+  SmallPtrSet<const SCEV *, 4> Taken;
+  for (;;) {
+    // Estimate the worst-case number of solutions we might consider. We almost
+    // never consider this many solutions because we prune the search space,
+    // but the pruning isn't always sufficient.
+    uint32_t Power = 1;
+    for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+         E = Uses.end(); I != E; ++I) {
+      size_t FSize = I->Formulae.size();
+      if (FSize >= Limit) {
+        Power = Limit;
+        break;
+      }
+      Power *= FSize;
+      if (Power >= Limit)
+        break;
+    }
+    if (Power < Limit)
+      break;
+
+    // Ok, we have too many of formulae on our hands to conveniently handle.
+    // Use a rough heuristic to thin out the list.
+
+    // Pick the register which is used by the most LSRUses, which is likely
+    // to be a good reuse register candidate.
+    const SCEV *Best = 0;
+    unsigned BestNum = 0;
+    for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+         I != E; ++I) {
+      const SCEV *Reg = *I;
+      if (Taken.count(Reg))
+        continue;
+      if (!Best)
+        Best = Reg;
+      else {
+        unsigned Count = RegUses.getUsedByIndices(Reg).count();
+        if (Count > BestNum) {
+          Best = Reg;
+          BestNum = Count;
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+                 << " will yeild profitable reuse.\n");
+    Taken.insert(Best);
+
+    // In any use with formulae which references this register, delete formulae
+    // which don't reference it.
+    for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
+         E = Uses.end(); I != E; ++I) {
+      LSRUse &LU = *I;
+      if (!LU.Regs.count(Best)) continue;
+
+      // Clear out the set of used regs; it will be recomputed.
+      LU.Regs.clear();
+
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        if (!F.referencesReg(Best)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+          std::swap(LU.Formulae.back(), F);
+          LU.Formulae.pop_back();
+          --e;
+          --i;
+          continue;
+        }
+
+        if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+        LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
   }
+}
 
-  // Replace the increment with a decrement.
-  DEBUG(dbgs() << "LSR: Examining use ");
-  DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false));
-  DEBUG(dbgs() << " in Inst: " << *Cond << '\n');
-  BinaryOperator *Decr =  BinaryOperator::Create(Instruction::Sub,
-                         Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
-  Incr->replaceAllUsesWith(Decr);
-  Incr->eraseFromParent();
-
-  // Substitute endval-startval for the original startval, and 0 for the
-  // original endval.  Since we're only testing for equality this is OK even
-  // if the computation wraps around.
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  Instruction *PreInsertPt = Preheader->getTerminator();
-  unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
-  Value *StartVal = PHIExpr->getIncomingValue(InBlock);
-  Value *EndVal = Cond->getOperand(1);
-  DEBUG(dbgs() << "    Optimize loop counting iv to count down ["
-        << *EndVal << " .. " << *StartVal << "]\n");
-
-  // FIXME: check for case where both are constant.
-  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
-  BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
-                                          EndVal, StartVal, "tmp", PreInsertPt);
-  PHIExpr->setIncomingValue(InBlock, NewStartVal);
-  Cond->setOperand(1, Zero);
-  DEBUG(dbgs() << "    New icmp: " << *Cond << "\n");
-
-  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
-  const SCEV *NewStride = 0;
-  bool Found = false;
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *OldStride = IU->StrideOrder[i];
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
-      if (SC->getValue()->getSExtValue() == -SInt) {
-        Found = true;
-        NewStride = OldStride;
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                               Cost &SolutionCost,
+                               SmallVectorImpl<const Formula *> &Workspace,
+                               const Cost &CurCost,
+                               const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                               DenseSet<const SCEV *> &VisitedRegs) const {
+  // Some ideas:
+  //  - prune more:
+  //    - use more aggressive filtering
+  //    - sort the formula so that the most profitable solutions are found first
+  //    - sort the uses too
+  //  - search faster:
+  //    - dont compute a cost, and then compare. compare while computing a cost
+  //      and bail early.
+  //    - track register sets with SmallBitVector
+
+  const LSRUse &LU = Uses[Workspace.size()];
+
+  // If this use references any register that's already a part of the
+  // in-progress solution, consider it a requirement that a formula must
+  // reference that register in order to be considered. This prunes out
+  // unprofitable searching.
+  SmallSetVector<const SCEV *, 4> ReqRegs;
+  for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+       E = CurRegs.end(); I != E; ++I)
+    if (LU.Regs.count(*I))
+      ReqRegs.insert(*I);
+
+  bool AnySatisfiedReqRegs = false;
+  SmallPtrSet<const SCEV *, 16> NewRegs;
+  Cost NewCost;
+retry:
+  for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+       E = LU.Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+
+    // Ignore formulae which do not use any of the required registers.
+    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+         JE = ReqRegs.end(); J != JE; ++J) {
+      const SCEV *Reg = *J;
+      if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+          F.BaseRegs.end())
+        goto skip;
+    }
+    AnySatisfiedReqRegs = true;
+
+    // Evaluate the cost of the current formula. If it's already worse than
+    // the current best, prune the search at that point.
+    NewCost = CurCost;
+    NewRegs = CurRegs;
+    NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+    if (NewCost < SolutionCost) {
+      Workspace.push_back(&F);
+      if (Workspace.size() != Uses.size()) {
+        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+                     NewRegs, VisitedRegs);
+        if (F.getNumRegs() == 1 && Workspace.size() == 1)
+          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+      } else {
+        DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+              dbgs() << ". Regs:";
+              for (SmallPtrSet<const SCEV *, 16>::const_iterator
+                   I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+                dbgs() << ' ' << **I;
+              dbgs() << '\n');
+
+        SolutionCost = NewCost;
+        Solution = Workspace;
+      }
+      Workspace.pop_back();
+    }
+  skip:;
+  }
+
+  // If none of the formulae had all of the required registers, relax the
+  // constraint so that we don't exclude all formulae.
+  if (!AnySatisfiedReqRegs) {
+    ReqRegs.clear();
+    goto retry;
+  }
+}
+
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+  SmallVector<const Formula *, 8> Workspace;
+  Cost SolutionCost;
+  SolutionCost.Loose();
+  Cost CurCost;
+  SmallPtrSet<const SCEV *, 16> CurRegs;
+  DenseSet<const SCEV *> VisitedRegs;
+  Workspace.reserve(Uses.size());
+
+  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+               CurRegs, VisitedRegs);
+
+  // Ok, we've now made all our decisions.
+  DEBUG(dbgs() << "\n"
+                  "The chosen solution requires "; SolutionCost.print(dbgs());
+        dbgs() << ":\n";
+        for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+          dbgs() << "  ";
+          Uses[i].print(dbgs());
+          dbgs() << "\n"
+                    "    ";
+          Solution[i]->print(dbgs());
+          dbgs() << '\n';
+        });
+}
+
+/// getImmediateDominator - A handy utility for the specific DominatorTree
+/// query that we need here.
+///
+static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
+  DomTreeNode *Node = DT.getNode(BB);
+  if (!Node) return 0;
+  Node = Node->getIDom();
+  if (!Node) return 0;
+  return Node->getBlock();
+}
+
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           Loop *L, Instruction *IVIncInsertPos,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts,
+                           ScalarEvolution &SE, DominatorTree &DT) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Then, collect some instructions which we will remain dominated by when
+  // expanding the replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoop && !L->contains(LF.UserInst))
+    Inputs.push_back(L->getLoopLatch()->getTerminator());
+
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  for (;;) {
+    bool AllDominate = true;
+    Instruction *BetterPos = 0;
+    BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
+    if (!IDom) break;
+    Instruction *Tentative = IDom->getTerminator();
+    for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+         E = Inputs.end(); I != E; ++I) {
+      Instruction *Inst = *I;
+      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+        AllDominate = false;
         break;
       }
+      if (IDom == Inst->getParent() &&
+          (!BetterPos || DT.dominates(BetterPos, Inst)))
+        BetterPos = next(BasicBlock::iterator(Inst));
+    }
+    if (!AllDominate)
+      break;
+    if (BetterPos)
+      IP = BetterPos;
+    else
+      IP = Tentative;
   }
+  while (isa<PHINode>(IP)) ++IP;
+
+  // Inform the Rewriter if we have a post-increment use, so that it can
+  // perform an advantageous expansion.
+  Rewriter.setPostInc(LF.PostIncLoop);
+
+  // This is the type that the user actually needs.
+  const Type *OpTy = LF.OperandValToReplace->getType();
+  // This will be the type that we'll initially expand to.
+  const Type *Ty = F.getType();
+  if (!Ty)
+    // No type known; just expand directly to the ultimate type.
+    Ty = OpTy;
+  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+    // Expand directly to the ultimate type if it's the right size.
+    Ty = OpTy;
+  // This is the type to do integer arithmetic in.
+  const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+  // Build up a list of operands to add together to form the full base.
+  SmallVector<const SCEV *, 8> Ops;
+
+  // Expand the BaseRegs portion.
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+    // If we're expanding for a post-inc user for the add-rec's loop, make the
+    // post-inc adjustment.
+    const SCEV *Start = Reg;
+    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
+      if (AR->getLoop() == LF.PostIncLoop) {
+        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
+        // If the user is inside the loop, insert the code after the increment
+        // so that it is dominated by its operand.
+        if (L->contains(LF.UserInst))
+          IP = IVIncInsertPos;
+        break;
+      }
+      Start = AR->getStart();
+    }
 
-  if (!Found)
-    NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
-  IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
-  IU->IVUsesByStride[Stride]->removeUser(CondUse);
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+  }
 
-  CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
-  Stride = NewStride;
+  // Expand the ScaledReg portion.
+  Value *ICmpScaledV = 0;
+  if (F.AM.Scale != 0) {
+    const SCEV *ScaledS = F.ScaledReg;
+
+    // If we're expanding for a post-inc user for the add-rec's loop, make the
+    // post-inc adjustment.
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
+      if (AR->getLoop() == LF.PostIncLoop)
+        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // An interesting way of "folding" with an icmp is to use a negated
+      // scale, which we'll implement by inserting it into the other operand
+      // of the icmp.
+      assert(F.AM.Scale == -1 &&
+             "The only scale supported by ICmpZero uses is -1!");
+      ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+    } else {
+      // Otherwise just expand the scaled register and an explicit scale,
+      // which is expected to be matched as part of the address.
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+      ScaledS = SE.getMulExpr(ScaledS,
+                              SE.getIntegerSCEV(F.AM.Scale,
+                                                ScaledS->getType()));
+      Ops.push_back(ScaledS);
+    }
+  }
 
-  ++NumCountZero;
+  // Expand the immediate portions.
+  if (F.AM.BaseGV)
+    Ops.push_back(SE.getSCEV(F.AM.BaseGV));
+  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  if (Offset != 0) {
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // The other interesting way of "folding" with an ICmpZero is to use a
+      // negated immediate.
+      if (!ICmpScaledV)
+        ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+      else {
+        Ops.push_back(SE.getUnknown(ICmpScaledV));
+        ICmpScaledV = ConstantInt::get(IntTy, Offset);
+      }
+    } else {
+      // Just add the immediate values. These again are expected to be matched
+      // as part of the address.
+      Ops.push_back(SE.getIntegerSCEV(Offset, IntTy));
+    }
+  }
 
-  return true;
+  // Emit instructions summing all the operands.
+  const SCEV *FullS = Ops.empty() ?
+                      SE.getIntegerSCEV(0, IntTy) :
+                      SE.getAddExpr(Ops);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+  // We're done expanding now, so reset the rewriter.
+  Rewriter.setPostInc(0);
+
+  // An ICmpZero Formula represents an ICmp which we're handling as a
+  // comparison against zero. Now that we've expanded an expression for that
+  // form, update the ICmp's other operand.
+  if (LU.Kind == LSRUse::ICmpZero) {
+    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+    DeadInsts.push_back(CI->getOperand(1));
+    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+                           "a scale at the same time!");
+    if (F.AM.Scale == -1) {
+      if (ICmpScaledV->getType() != OpTy) {
+        Instruction *Cast =
+          CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+                                                   OpTy, false),
+                           ICmpScaledV, OpTy, "tmp", CI);
+        ICmpScaledV = Cast;
+      }
+      CI->setOperand(1, ICmpScaledV);
+    } else {
+      assert(F.AM.Scale == 0 &&
+             "ICmp does not support folding a global value and "
+             "a scale at the same time!");
+      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+                                           -(uint64_t)Offset);
+      if (C->getType() != OpTy)
+        C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                          OpTy, false),
+                                  C, OpTy);
+
+      CI->setOperand(1, C);
+    }
+  }
+
+  return FullV;
 }
 
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
-  bool ThisChanged = false;
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(Stride);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    // FIXME: Generalize to non-affine IV's.
-    if (!SI->first->isLoopInvariant(L))
-      continue;
-    // If stride is a constant and it has an icmpinst use, check if we can
-    // optimize the loop to count down.
-    if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
-      Instruction *User = SI->second->Users.begin()->getUser();
-      if (!isa<ICmpInst>(User))
-        continue;
-      const SCEV *CondStride = Stride;
-      IVStrideUse *Use = &*SI->second->Users.begin();
-      if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
-        continue;
-      ThisChanged = true;
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+                          const Formula &F,
+                          Loop *L, Instruction *IVIncInsertPos,
+                          SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakVH> &DeadInsts,
+                          ScalarEvolution &SE, DominatorTree &DT,
+                          Pass *P) const {
+  const Type *OpTy = LF.OperandValToReplace->getType();
+
+  // First, find an insertion point that dominates UserInst. For PHI nodes,
+  // find the nearest block which dominates all the relevant uses.
+  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+    DenseMap<BasicBlock *, Value *> Inserted;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+        BasicBlock *BB = PN->getIncomingBlock(i);
 
-      // Now check if it's possible to reuse this iv for other stride uses.
-      for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
-        const SCEV *SStride = IU->StrideOrder[j];
-        if (SStride == CondStride)
-          continue;
-        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
-          IU->IVUsesByStride.find(SStride);
-        assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-        // FIXME: Generalize to non-affine IV's.
-        if (!SII->first->isLoopInvariant(L))
-          continue;
-        // FIXME: Rewrite other stride using CondStride.
+        // If this is a critical edge, split the edge so that we do not insert
+        // the code on all predecessor/successor paths.  We do this unless this
+        // is the canonical backedge for this loop, which complicates post-inc
+        // users.
+        if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+            !isa<IndirectBrInst>(BB->getTerminator()) &&
+            (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+          // Split the critical edge.
+          BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+          // If PN is outside of the loop and BB is in the loop, we want to
+          // move the block to be immediately before the PHI block, not
+          // immediately after BB.
+          if (L->contains(BB) && !L->contains(PN))
+            NewBB->moveBefore(PN->getParent());
+
+          // Splitting the edge can reduce the number of PHI entries we have.
+          e = PN->getNumIncomingValues();
+          BB = NewBB;
+          i = PN->getBasicBlockIndex(BB);
+        }
+
+        std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+          Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+        if (!Pair.second)
+          PN->setIncomingValue(i, Pair.first->second);
+        else {
+          Value *FullV = Expand(LF, F, BB->getTerminator(), L, IVIncInsertPos,
+                                Rewriter, DeadInsts, SE, DT);
+
+          // If this is reuse-by-noop-cast, insert the noop cast.
+          if (FullV->getType() != OpTy)
+            FullV =
+              CastInst::Create(CastInst::getCastOpcode(FullV, false,
+                                                       OpTy, false),
+                               FullV, LF.OperandValToReplace->getType(),
+                               "tmp", BB->getTerminator());
+
+          PN->setIncomingValue(i, FullV);
+          Pair.first->second = FullV;
+        }
       }
+  } else {
+    Value *FullV = Expand(LF, F, LF.UserInst, L, IVIncInsertPos,
+                          Rewriter, DeadInsts, SE, DT);
+
+    // If this is reuse-by-noop-cast, insert the noop cast.
+    if (FullV->getType() != OpTy) {
+      Instruction *Cast =
+        CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+                         FullV, OpTy, "tmp", LF.UserInst);
+      FullV = Cast;
     }
+
+    // Update the user. ICmpZero is handled specially here (for now) because
+    // Expand may have updated one of the operands of the icmp already, and
+    // its new value may happen to be equal to LF.OperandValToReplace, in
+    // which case doing replaceUsesOfWith leads to replacing both operands
+    // with the same value. TODO: Reorganize this.
+    if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+      LF.UserInst->setOperand(0, FullV);
+    else
+      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
   }
 
-  Changed |= ThisChanged;
-  return ThisChanged;
+  DeadInsts.push_back(LF.OperandValToReplace);
 }
 
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
-  IU = &getAnalysis<IVUsers>();
-  SE = &getAnalysis<ScalarEvolution>();
-  Changed = false;
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                               Pass *P) {
+  // Keep track of instructions we may have made dead, so that
+  // we can remove them after we are done working.
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  SCEVExpander Rewriter(SE);
+  Rewriter.disableCanonicalMode();
+  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
-  // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->getLoopPreheader() || !L->getLoopLatch())
-    return false;
+  // Expand the new value definitions and update the users.
+  for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
+    size_t LUIdx = Fixups[i].LUIdx;
+
+    Rewrite(Fixups[i], *Solution[LUIdx], L, IVIncInsertPos, Rewriter,
+            DeadInsts, SE, DT, P);
+
+    Changed = true;
+  }
 
-  if (!IU->IVUsesByStride.empty()) {
-    DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
-          << "\" ";
-          L->print(dbgs()));
+  // Clean up after ourselves. This must be done before deleting any
+  // instructions.
+  Rewriter.clear();
 
-    // Sort the StrideOrder so we process larger strides first.
-    std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
-                     StrideCompare(SE));
+  Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
 
-    // Optimize induction variables.  Some indvar uses can be transformed to use
-    // strides that will be needed for other purposes.  A common example of this
-    // is the exit test for the loop, which can often be rewritten to use the
-    // computation of some other indvar to decide when to terminate the loop.
-    OptimizeIndvars(L);
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+  : IU(P->getAnalysis<IVUsers>()),
+    SE(P->getAnalysis<ScalarEvolution>()),
+    DT(P->getAnalysis<DominatorTree>()),
+    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
 
-    // Change loop terminating condition to use the postinc iv when possible
-    // and optimize loop terminating compare. FIXME: Move this after
-    // StrengthReduceIVUsersOfStride?
-    OptimizeLoopTermCond(L);
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm()) return;
+
+  // If there's no interesting work to be done, bail early.
+  if (IU.empty()) return;
+
+  DEBUG(dbgs() << "\nLSR on loop ";
+        WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+        dbgs() << ":\n");
+
+  /// OptimizeShadowIV - If IV is used in a int-to-float cast
+  /// inside the loop then try to eliminate the cast opeation.
+  OptimizeShadowIV();
+
+  // Change loop terminating condition to use the postinc iv when possible.
+  Changed |= OptimizeLoopTermCond();
+
+  CollectInterestingTypesAndFactors();
+  CollectFixupsAndInitialFormulae();
+  CollectLoopInvariantFixupsAndFormulae();
+
+  DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+        print_uses(dbgs()));
+
+  // Now use the reuse data to generate a bunch of interesting ways
+  // to formulate the values needed for the uses.
+  GenerateAllReuseFormulae();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
+
+  FilterOutUndesirableDedicatedRegisters();
+  NarrowSearchSpaceUsingHeuristics();
+
+  SmallVector<const Formula *, 8> Solution;
+  Solve(Solution);
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
+
+  // Release memory that is no longer needed.
+  Factors.clear();
+  Types.clear();
+  RegUses.clear();
+
+#ifndef NDEBUG
+  // Formulae should be legal.
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+     const LSRUse &LU = *I;
+     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+          JE = LU.Formulae.end(); J != JE; ++J)
+        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI) &&
+               "Illegal formula generated!");
+  };
+#endif
 
-    // FIXME: We can shrink overlarge IV's here.  e.g. if the code has
-    // computation in i64 values and the target doesn't support i64, demote
-    // the computation to 32-bit if safe.
+  // Now that we've decided what we want, make it so.
+  ImplementSolution(Solution, P);
+}
 
-    // FIXME: Attempt to reuse values across multiple IV's.  In particular, we
-    // could have something like "for(i) { foo(i*8); bar(i*16) }", which should
-    // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
-    // Need to be careful that IV's are all the same type.  Only works for
-    // intptr_t indvars.
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+  if (Factors.empty() && Types.empty()) return;
 
-    // IVsByStride keeps IVs for one particular loop.
-    assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
+  OS << "LSR has identified the following interesting factors and types: ";
+  bool First = true;
 
-    StrengthReduceIVUsers(L);
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '*' << *I;
+  }
 
-    // After all sharing is done, see if we can adjust the loop to test against
-    // zero instead of counting up to a maximum.  This is usually faster.
-    OptimizeLoopCountIV(L);
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '(' << **I << ')';
+  }
+  OS << '\n';
+}
 
-    // We're done analyzing this loop; release all the state we built up for it.
-    IVsByStride.clear();
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+  OS << "LSR is examining the following fixup sites:\n";
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &LF = *I;
+    dbgs() << "  ";
+    LF.print(OS);
+    OS << '\n';
+  }
+}
 
-    // Clean up after ourselves
-    DeleteTriviallyDeadInstructions();
+void LSRInstance::print_uses(raw_ostream &OS) const {
+  OS << "LSR is examining the following uses:\n";
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    const LSRUse &LU = *I;
+    dbgs() << "  ";
+    LU.print(OS);
+    OS << '\n';
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+         JE = LU.Formulae.end(); J != JE; ++J) {
+      OS << "    ";
+      J->print(OS);
+      OS << '\n';
+    }
   }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+  print_factors_and_types(OS);
+  print_fixups(OS);
+  print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+  /// TLI - Keep a pointer of a TargetLowering to consult for determining
+  /// transformation profitability.
+  const TargetLowering *const TLI;
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+  bool runOnLoop(Loop *L, LPPassManager &LPM);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+  return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+  : LoopPass(&ID), TLI(tli) {}
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We split critical edges, so we change the CFG.  However, we do update
+  // many analyses if they are around.
+  AU.addPreservedID(LoopSimplifyID);
+  AU.addPreserved<LoopInfo>();
+  AU.addPreserved("domfrontier");
+
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<DominatorTree>();
+  AU.addPreserved<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.addPreserved<ScalarEvolution>();
+  AU.addRequired<IVUsers>();
+  AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+  bool Changed = false;
+
+  // Run the main LSR transformation.
+  Changed |= LSRInstance(TLI, L, this).getChanged();
 
   // At this point, it is worth checking to see if any recurrence PHIs are also
   // dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ee8cb4f..a355ec3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -76,11 +76,12 @@ static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
 Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
 
 /// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L) {
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
   CodeMetrics Metrics;
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     Metrics.analyzeBasicBlock(*I);
+  NumCalls = Metrics.NumCalls;
   return Metrics.NumInsts;
 }
 
@@ -110,8 +111,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Enforce the threshold.
   if (UnrollThreshold != NoThreshold) {
-    unsigned LoopSize = ApproximateLoopSize(L);
+    unsigned NumCalls;
+    unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+    if (NumCalls != 0) {
+      DEBUG(dbgs() << "  Not unrolling loop with function calls.\n");
+      return false;
+    }
     uint64_t Size = (uint64_t)LoopSize*Count;
     if (TripCount != 1 && Size > UnrollThreshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 527a7b5..990e0c4 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -169,6 +169,10 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
 /// invariant in the loop, or has an invariant piece, return the invariant.
 /// Otherwise, return null.
 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+  // We can never unswitch on vector conditions.
+  if (isa<VectorType>(Cond->getType()))
+    return 0;
+
   // Constants should be folded, not unswitched on!
   if (isa<Constant>(Cond)) return 0;
 
@@ -401,7 +405,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
 /// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
 
   initLoopData();
 
@@ -867,7 +871,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
   // in the loop with the appropriate one directly.
   if (IsEqual || (isa<ConstantInt>(Val) &&
-      Val->getType()->isInteger(1))) {
+      Val->getType()->isIntegerTy(1))) {
     Value *Replacement;
     if (IsEqual)
       Replacement = Val;
@@ -993,10 +997,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::And:
       if (isa<ConstantInt>(I->getOperand(0)) && 
           // constant -> RHS
-          I->getOperand(0)->getType()->isInteger(1))
+          I->getOperand(0)->getType()->isIntegerTy(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType()->isInteger(1)) {
+        if (CB->getType()->isIntegerTy(1)) {
           if (CB->isOne())      // X & 1 -> X
             ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
           else                  // X & 0 -> 0
@@ -1007,10 +1011,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::Or:
       if (isa<ConstantInt>(I->getOperand(0)) &&
           // constant -> RHS
-          I->getOperand(0)->getType()->isInteger(1))
+          I->getOperand(0)->getType()->isIntegerTy(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType()->isInteger(1)) {
+        if (CB->getType()->isIntegerTy(1)) {
           if (CB->isOne())   // X | 1 -> 1
             ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
           else                  // X | 0 -> X
diff --git a/lib/Transforms/Scalar/Makefile b/lib/Transforms/Scalar/Makefile
index e18f30f..cc42fd0 100644
--- a/lib/Transforms/Scalar/Makefile
+++ b/lib/Transforms/Scalar/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMScalarOpts
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e0aa491..62e2977 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) {
   LLVMContext &Context = V->getContext();
   
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType()->isInteger(8)) return V;
+  if (V->getType()->isIntegerTy(8)) return V;
   
   // Constant float and double values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0. 
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 4a99f4a..187216a 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -182,7 +182,7 @@ unsigned Reassociate::getRank(Value *V) {
 
   // If this is a not or neg instruction, do not count it for rank.  This
   // assures us that X and ~X will have the same rank.
-  if (!I->getType()->isInteger() ||
+  if (!I->getType()->isIntegerTy() ||
       (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
     ++Rank;
 
@@ -249,7 +249,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
 
 /// LinearizeExprTree - Given an associative binary expression tree, traverse
 /// all of the uses putting it into canonical form.  This forces a left-linear
-/// form of the the expression (((a+b)+c)+d), and collects information about the
+/// form of the expression (((a+b)+c)+d), and collects information about the
 /// rank of the non-tree operands.
 ///
 /// NOTE: These intentionally destroys the expression tree operands (turning
@@ -299,7 +299,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
     Success = false;
     MadeChange = true;
   } else if (RHSBO) {
-    // Turn (A+B)+(C+D) -> (((A+B)+C)+D).  This guarantees the the RHS is not
+    // Turn (A+B)+(C+D) -> (((A+B)+C)+D).  This guarantees the RHS is not
     // part of the expression tree.
     LinearizeExpr(I);
     LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
@@ -929,10 +929,19 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
       }
 
     // Reject cases where it is pointless to do this.
-    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() || 
+    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
         isa<VectorType>(BI->getType()))
       continue;  // Floating point ops are not associative.
 
+    // Do not reassociate boolean (i1) expressions.  We want to preserve the
+    // original order of evaluation for short-circuited comparisons that
+    // SimplifyCFG has folded to AND/OR expressions.  If the expression
+    // is not further optimized, it is likely to be transformed back to a
+    // short-circuited form for code gen, and the source order may have been
+    // optimized for the most likely conditions.
+    if (BI->getType()->isIntegerTy(1))
+      continue;
+
     // If this is a subtract instruction which is not already in negate form,
     // see if we can convert it to X+-Y.
     if (BI->getOpcode() == Instruction::Sub) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index f473480..822712e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -202,12 +202,18 @@ bool SROA::performPromotion(Function &F) {
   return Changed;
 }
 
-/// getNumSAElements - Return the number of elements in the specific struct or
-/// array.
-static uint64_t getNumSAElements(const Type *T) {
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA.  It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+  const Type *T = AI->getAllocatedType();
+  // Do not promote any struct into more than 32 separate vars.
   if (const StructType *ST = dyn_cast<StructType>(T))
-    return ST->getNumElements();
-  return cast<ArrayType>(T)->getNumElements();
+    return ST->getNumElements() <= 32;
+  // Arrays are much less likely to be safe for SROA; only consider
+  // them if they are very small.
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+    return AT->getNumElements() <= 8;
+  return false;
 }
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
@@ -266,22 +272,18 @@ bool SROA::performScalarRepl(Function &F) {
     // Do not promote [0 x %struct].
     if (AllocaSize == 0) continue;
 
+    // If the alloca looks like a good candidate for scalar replacement, and if
+    // all its users can be transformed, then split up the aggregate into its
+    // separate elements.
+    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+      DoScalarReplacement(AI, WorkList);
+      Changed = true;
+      continue;
+    }
+
     // Do not promote any struct whose size is too big.
     if (AllocaSize > SRThreshold) continue;
 
-    if ((isa<StructType>(AI->getAllocatedType()) ||
-         isa<ArrayType>(AI->getAllocatedType())) &&
-        // Do not promote any struct into more than "32" separate vars.
-        getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
-      // Check that all of the users of the allocation are capable of being
-      // transformed.
-      if (isSafeAllocaToScalarRepl(AI)) {
-        DoScalarReplacement(AI, WorkList);
-        Changed = true;
-        continue;
-      }
-    }
-
     // If we can turn this aggregate value (potentially with casts) into a
     // simple scalar value that can be mem2reg'd into a register value.
     // IsNotTrivial tracks whether this is something that mem2reg could have
@@ -681,7 +683,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
     Val->takeName(GEPI);
   }
   if (Val->getType() != GEPI->getType())
-    Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI);
+    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
   GEPI->replaceAllUsesWith(Val);
   DeadInsts.push_back(GEPI);
 }
@@ -769,7 +771,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       Value *Idx[2] = { Zero,
                       ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
       OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
-                                           OtherPtr->getNameStr()+"."+Twine(i),
+                                              OtherPtr->getName()+"."+Twine(i),
                                                    MI);
       uint64_t EltOffset;
       const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
@@ -833,7 +835,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           StoreVal = ConstantInt::get(Context, TotalVal);
           if (isa<PointerType>(ValTy))
             StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
-          else if (ValTy->isFloatingPoint())
+          else if (ValTy->isFloatingPointTy())
             StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
           
@@ -853,12 +855,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
     
     // Cast the element pointer to BytePtrTy.
     if (EltPtr->getType() != BytePtrTy)
-      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
+      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
     
     // Cast the other pointer (if we have one) to BytePtrTy. 
     if (OtherElt && OtherElt->getType() != BytePtrTy)
-      OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
-                                 MI);
+      OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
     
     unsigned EltSize = TD->getTypeAllocSize(EltTy);
     
@@ -938,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
-      } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+      } else if (FieldTy->isFloatingPointTy() || isa<VectorType>(FieldTy)) {
         // Bitcast to the right element type (for fp/vector values).
         EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
       } else {
@@ -982,7 +983,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
-      } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+      } else if (ArrayEltTy->isFloatingPointTy() ||
+                 isa<VectorType>(ArrayEltTy)) {
         // Bitcast to the right element type (for fp/vector values).
         EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
       } else {
@@ -1042,7 +1044,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     
     const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 
                                                      FieldSizeBits);
-    if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
+    if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPointTy() &&
         !isa<VectorType>(FieldTy))
       SrcField = new BitCastInst(SrcField,
                                  PointerType::getUnqual(FieldIntTy),
@@ -1384,9 +1386,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
       // If the source and destination are both to the same alloca, then this is
       // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
       // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
+      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
       
-      if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
+      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
         // Dest must be OrigAI, change this to be a load from the original
         // pointer (bitcasted), then a store to our new alloca.
         assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -1396,7 +1398,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
         LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
         SrcVal->setAlignment(MTI->getAlignment());
         Builder.CreateStore(SrcVal, NewAI);
-      } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
+      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
         // Src must be OrigAI, change this to be a load from NewAI then a store
         // through the original dest pointer (bitcasted).
         assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -1521,7 +1523,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // If the result is an integer, this is a trunc or bitcast.
   if (isa<IntegerType>(ToType)) {
     // Should be done.
-  } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+  } else if (ToType->isFloatingPointTy() || isa<VectorType>(ToType)) {
     // Just do a bitcast, we know the sizes match up.
     FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
   } else {
@@ -1599,7 +1601,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
   unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
   unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
-  if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+  if (SV->getType()->isFloatingPointTy() || isa<VectorType>(SV->getType()))
     SV = Builder.CreateBitCast(SV,
                             IntegerType::get(SV->getContext(),SrcWidth), "tmp");
   else if (isa<PointerType>(SV->getType()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 43447de..62f34a2 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -261,7 +262,7 @@ static bool MergeEmptyReturnBlocks(Function &F) {
 
 /// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F) {
+static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
   bool Changed = false;
   bool LocalChange = true;
   while (LocalChange) {
@@ -271,7 +272,7 @@ static bool IterativeSimplifyCFG(Function &F) {
     // if they are unneeded...
     //
     for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++)) {
+      if (SimplifyCFG(BBIt++, TD)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -285,10 +286,11 @@ static bool IterativeSimplifyCFG(Function &F) {
 // simplify the CFG.
 //
 bool CFGSimplifyPass::runOnFunction(Function &F) {
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
   bool EverChanged = RemoveUnreachableBlocksFromFn(F);
   EverChanged |= MergeEmptyReturnBlocks(F);
-  EverChanged |= IterativeSimplifyCFG(F);
-  
+  EverChanged |= IterativeSimplifyCFG(F, TD);
+
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
 
@@ -299,11 +301,11 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
   // RemoveUnreachableBlocksFromFn doesn't do anything.
   if (!RemoveUnreachableBlocksFromFn(F))
     return true;
-  
+
   do {
-    EverChanged = IterativeSimplifyCFG(F);
+    EverChanged = IterativeSimplifyCFG(F, TD);
     EverChanged |= RemoveUnreachableBlocksFromFn(F);
   } while (EverChanged);
-  
+
   return true;
 }
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 5acd6aa..4464961 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -68,7 +68,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
     Function *Callee = Call->getCalledFunction();
 
     // Minimally sanity-check the CFG of half_powr to ensure that it contains
-    // the the kind of code we expect.  If we're running this pass, we have
+    // the kind of code we expect.  If we're running this pass, we have
     // reason to believe it will be what we expect.
     Function::iterator I = Callee->begin();
     BasicBlock *Prologue = I++;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index a49da9c..54b4380 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -152,7 +152,7 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
 
   Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
                                            TD->getIntPtrType(*Context),
-					   Type::getInt8PtrTy(*Context),
+                                           Type::getInt8PtrTy(*Context),
                                            NULL);
   CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
   if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -232,10 +232,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
   AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
 
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-					 Type::getInt8PtrTy(*Context),
-					 Type::getInt8PtrTy(*Context),
+                                         Type::getInt8PtrTy(*Context),
+                                         Type::getInt8PtrTy(*Context),
                                          Type::getInt32Ty(*Context),
-					 TD->getIntPtrType(*Context),
+                                         TD->getIntPtrType(*Context),
                                          NULL);
   CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
 
@@ -321,9 +321,9 @@ Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
                                           Type::getInt32Ty(*Context), NULL);
   CallInst *CI = B.CreateCall(PutChar,
                               B.CreateIntCast(Char,
-					      Type::getInt32Ty(*Context),
-                                              /*isSigned*/true,
-					      "chari"),
+                              Type::getInt32Ty(*Context),
+                              /*isSigned*/true,
+                              "chari"),
                               "putchar");
 
   if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
@@ -341,7 +341,7 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
                                        Type::getInt32Ty(*Context),
-                                    Type::getInt8PtrTy(*Context),
+                                       Type::getInt8PtrTy(*Context),
                                        NULL);
   CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
   if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -359,13 +359,13 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
   Constant *F;
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
-			       Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context),
                                Type::getInt32Ty(*Context), File->getType(),
-			       NULL);
+                               NULL);
   else
     F = M->getOrInsertFunction("fputc",
-			       Type::getInt32Ty(*Context),
-			       Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context),
                                File->getType(), NULL);
   Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
                          "chari");
@@ -386,7 +386,7 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
   Constant *F;
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
-			       Type::getInt32Ty(*Context),
+                               Type::getInt32Ty(*Context),
                                Type::getInt8PtrTy(*Context),
                                File->getType(), NULL);
   else
@@ -414,13 +414,13 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
                                TD->getIntPtrType(*Context),
                                Type::getInt8PtrTy(*Context),
                                TD->getIntPtrType(*Context),
-			       TD->getIntPtrType(*Context),
+                               TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   else
     F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
                                Type::getInt8PtrTy(*Context),
                                TD->getIntPtrType(*Context),
-			       TD->getIntPtrType(*Context),
+                               TD->getIntPtrType(*Context),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
                         ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
@@ -525,7 +525,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
 
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || !Array->getType()->getElementType()->isInteger(8))
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
     return false;
 
   // Get the number of elements in the array
@@ -697,7 +697,7 @@ struct StrChrOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32.
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
         return 0;
 
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
@@ -739,7 +739,7 @@ struct StrCmpOpt : public LibCallOptimization {
     // Verify the "strcmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-	!FT->getReturnType()->isInteger(32) ||
+	!FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
@@ -787,7 +787,7 @@ struct StrNCmpOpt : public LibCallOptimization {
     // Verify the "strncmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-	!FT->getReturnType()->isInteger(32) ||
+	!FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
@@ -1008,7 +1008,7 @@ struct MemCmpOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        !FT->getReturnType()->isInteger(32))
+        !FT->getReturnType()->isIntegerTy(32))
       return 0;
 
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
@@ -1203,22 +1203,23 @@ struct MemMoveChkOpt : public LibCallOptimization {
 
 struct StrCpyChkOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require TargetData.
-    if (!TD) return 0;
-
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getParamType(2)))
+        !isa<PointerType>(FT->getParamType(1)))
       return 0;
 
     ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
     if (!SizeCI)
       return 0;
     
-    // We don't have any length information, just lower to a plain strcpy.
-    if (SizeCI->isAllOnesValue())
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain strcpy. Otherwise we'll keep our
+    // strcpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (SizeCI->isAllOnesValue() ||
+        SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2)))
       return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B);
 
     return 0;
@@ -1240,7 +1241,7 @@ struct PowOpt : public LibCallOptimization {
     // result type.
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        !FT->getParamType(0)->isFloatingPoint())
+        !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
     Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
@@ -1294,7 +1295,7 @@ struct Exp2Opt : public LibCallOptimization {
     // Just make sure this has 1 argument of FP type, which matches the
     // result type.
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isFloatingPoint())
+        !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
     Value *Op = CI->getOperand(1);
@@ -1327,7 +1328,7 @@ struct Exp2Opt : public LibCallOptimization {
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                              Op->getType(),
-					     Type::getInt32Ty(*Context),NULL);
+                                             Type::getInt32Ty(*Context),NULL);
       CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
@@ -1374,7 +1375,7 @@ struct FFSOpt : public LibCallOptimization {
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
     if (FT->getNumParams() != 1 ||
-	!FT->getReturnType()->isInteger(32) ||
+	!FT->getReturnType()->isIntegerTy(32) ||
         !isa<IntegerType>(FT->getParamType(0)))
       return 0;
 
@@ -1410,7 +1411,7 @@ struct IsDigitOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        !FT->getParamType(0)->isInteger(32))
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isdigit(c) -> (c-'0') <u 10
@@ -1431,7 +1432,7 @@ struct IsAsciiOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        !FT->getParamType(0)->isInteger(32))
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isascii(c) -> c <u 128
@@ -1472,7 +1473,7 @@ struct ToAsciiOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isInteger(32))
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isascii(c) -> c & 0x7f
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 4119cb9..162d902 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -211,7 +211,8 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
       // FIXME: Writes to memory only matter if they may alias the pointer
       // being loaded from.
       if (CI->mayWriteToMemory() ||
-          !isSafeToLoadUnconditionally(L->getPointerOperand(), L))
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+                                       L->getAlignment()))
         return false;
     }
   }
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 19c7206..3657390 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -179,7 +179,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // Create a new basic block, linking it into the CFG.
   BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
                       TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
-  // Create our unconditional branch...
+  // Create our unconditional branch.
   BranchInst::Create(DestBB, NewBB);
 
   // Branch to the new block, breaking the edge.
@@ -192,16 +192,47 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   
   // If there are any PHI nodes in DestBB, we need to update them so that they
   // merge incoming values from NewBB instead of from TIBB.
-  //
-  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-    // We no longer enter through TIBB, now we come in through NewBB.  Revector
-    // exactly one entry in the PHI node that used to come from TIBB to come
-    // from NewBB.
-    int BBIdx = PN->getBasicBlockIndex(TIBB);
-    PN->setIncomingBlock(BBIdx, NewBB);
+  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
+    // This conceptually does:
+    //  foreach (PHINode *PN in DestBB)
+    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
+    // but is optimized for two cases.
+    
+    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
+      unsigned BBIdx = 0;
+      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+        // We no longer enter through TIBB, now we come in through NewBB.
+        // Revector exactly one entry in the PHI node that used to come from
+        // TIBB to come from NewBB.
+        PHINode *PN = cast<PHINode>(I);
+        
+        // Reuse the previous value of BBIdx if it lines up.  In cases where we
+        // have multiple phi nodes with *lots* of predecessors, this is a speed
+        // win because we don't have to scan the PHI looking for TIBB.  This
+        // happens because the BB list of PHI nodes are usually in the same
+        // order.
+        if (PN->getIncomingBlock(BBIdx) != TIBB)
+          BBIdx = PN->getBasicBlockIndex(TIBB);
+        PN->setIncomingBlock(BBIdx, NewBB);
+      }
+    } else {
+      // However, the foreach loop is slow for blocks with lots of predecessors
+      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
+      // the user list of TIBB to find the PHI nodes.
+      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
+    
+      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
+           UI != E; ) {
+        Value::use_iterator Use = UI++;
+        if (PHINode *PN = dyn_cast<PHINode>(Use)) {
+          // Remove one entry from each PHI.
+          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
+            PN->setOperand(Use.getOperandNo(), NewBB);
+        }
+      }
+    }
   }
-  
+   
   // If there are any other edges from TIBB to DestBB, update those to go
   // through the split block, making those edges non-critical as well (and
   // reducing the number of phi entries in the DestBB if relevant).
@@ -221,6 +252,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   // If we don't have a pass object, we can't update anything...
   if (P == 0) return NewBB;
+  
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+  
+  // If we have nothing to update, just return.
+  if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+    return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
   // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
@@ -229,14 +269,23 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // loop header) then NewBB dominates DestBB.
   SmallVector<BasicBlock*, 8> OtherPreds;
 
-  for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I)
-    if (*I != NewBB)
-      OtherPreds.push_back(*I);
+  // If there is a PHI in the block, loop over predecessors with it, which is
+  // faster than iterating pred_begin/end.
+  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingBlock(i) != NewBB)
+        OtherPreds.push_back(PN->getIncomingBlock(i));
+  } else {
+    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+         I != E; ++I)
+      if (*I != NewBB)
+        OtherPreds.push_back(*I);
+  }
   
   bool NewBBDominatesDestBB = true;
   
   // Should we update DominatorTree information?
-  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+  if (DT) {
     DomTreeNode *TINode = DT->getNode(TIBB);
 
     // The new block is not the immediate dominator for any other nodes, but
@@ -267,7 +316,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
 
   // Should we update DominanceFrontier information?
-  if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) {
+  if (DF) {
     // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
     if (!OtherPreds.empty()) {
       // FIXME: IMPLEMENT THIS!
@@ -301,7 +350,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
   
   // Update LoopInfo if it is around.
-  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
+  if (LI) {
     if (Loop *TIL = LI->getLoopFor(TIBB)) {
       // If one or the other blocks were not in a loop, the new block is not
       // either, and thus LI doesn't need to be updated.
@@ -382,9 +431,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
 
   // Update ProfileInfo if it is around.
-  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
-    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
-  }
+  if (PI)
+    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
 
   return NewBB;
 }
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index bd750cc..c80827d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 // CloneBasicBlock - See comments in Cloning.h
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   DenseMap<const Value*, Value*> &ValueMap,
-                                  const char *NameSuffix, Function *F,
+                                  const Twine &NameSuffix, Function *F,
                                   ClonedCodeInfo *CodeInfo) {
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 92bdf2d..57ad459 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -38,20 +38,82 @@ using namespace llvm;
 //  Local analysis.
 //
 
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to Value::getUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+                                            uint64_t &ByteOffset,
+                                            unsigned MaxLookup = 6) {
+  if (!isa<PointerType>(V->getType()))
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->hasAllConstantIndices())
+        return V;
+      SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+      ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                         &Indices[0], Indices.size());
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+  }
+  return V;
+}
+
 /// isSafeToLoadUnconditionally - Return true if we know that executing a load
 /// from this value cannot trap.  If it is not obviously safe to load from the
 /// specified pointer, we do a quick local scan of the basic block containing
 /// ScanFrom, to determine if the address is already accessed.
-bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
-  // If it is an alloca it is always safe to load from.
-  if (isa<AllocaInst>(V)) return true;
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+                                       unsigned Align, const TargetData *TD) {
+  uint64_t ByteOffset = 0;
+  Value *Base = V;
+  if (TD)
+    Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+  const Type *BaseType = 0;
+  unsigned BaseAlign = 0;
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+    // An alloca is safe to load from as load as it is suitably aligned.
+    BaseType = AI->getAllocatedType();
+    BaseAlign = AI->getAlignment();
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+    // Global variables are safe to load from but their size cannot be
+    // guaranteed if they are overridden.
+    if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+      BaseType = GV->getType()->getElementType();
+      BaseAlign = GV->getAlignment();
+    }
+  }
 
-  // If it is a global variable it is mostly safe to load from.
-  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
-    // Don't try to evaluate aliases.  External weak GV can be null.
-    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+  if (BaseType && BaseType->isSized()) {
+    if (TD && BaseAlign == 0)
+      BaseAlign = TD->getPrefTypeAlignment(BaseType);
 
-  // Otherwise, be a little bit agressive by scanning the local block where we
+    if (Align <= BaseAlign) {
+      if (!TD)
+        return true; // Loading directly from an alloca or global is OK.
+
+      // Check if the load is within the bounds of the underlying object.
+      const PointerType *AddrTy = cast<PointerType>(V->getType());
+      uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+      if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+          (Align == 0 || (ByteOffset % Align) == 0))
+        return true;
+    }
+  }
+
+  // Otherwise, be a little bit aggressive by scanning the local block where we
   // want to check to see if the pointer is already being loaded or stored
   // from/to.  If so, the previous load or store would have already trapped,
   // so there is no harm doing an extra load (also, CSE will later eliminate
@@ -428,6 +490,17 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
   DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  // Zap anything that took the address of DestBB.  Not doing this will give the
+  // address an invalid value.
+  if (DestBB->hasAddressTaken()) {
+    BlockAddress *BA = BlockAddress::get(DestBB);
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                     BA->getType()));
+    BA->destroyConstant();
+  }
   
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e81b779..57bab60 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -176,8 +176,9 @@ ReprocessLoop:
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getExitBlocks(ExitBlocks);
     
-  SetVector<BasicBlock*> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
-  for (SetVector<BasicBlock*>::iterator I = ExitBlockSet.begin(),
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
          E = ExitBlockSet.end(); I != E; ++I) {
     BasicBlock *ExitBlock = *I;
     for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 53117a0..e47c86d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -29,7 +29,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <cstdio>
 
 using namespace llvm;
 
@@ -204,15 +203,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   Latches.push_back(LatchBlock);
 
   for (unsigned It = 1; It != Count; ++It) {
-    char SuffixBuffer[100];
-    sprintf(SuffixBuffer, ".%d", It);
-    
     std::vector<BasicBlock*> NewBlocks;
     
     for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
          E = LoopBlocks.end(); BB != E; ++BB) {
       ValueMapTy ValueMap;
-      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
+      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
       Header->getParent()->getBasicBlockList().push_back(New);
 
       // Loop over all of the PHI nodes in the block, changing them to use the
diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile
index b9761df..d1e9336 100644
--- a/lib/Transforms/Utils/Makefile
+++ b/lib/Transforms/Utils/Makefile
@@ -10,7 +10,6 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMTransformUtils
 BUILD_ARCHIVE = 1
-CXXFLAGS = -fno-rtti
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index d9261ac..544e20b 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasSetTracker.h"
@@ -84,6 +85,18 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
   return true;
 }
 
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) {
+  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1))
+    for (Value::use_iterator UI = DebugNode->use_begin(),
+         E = DebugNode->use_end(); UI != E; ++UI)
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+        return DDI;
+
+  return 0;
+}
+
 namespace {
   struct AllocaInfo;
 
@@ -188,6 +201,11 @@ namespace {
     ///
     std::vector<Value*> PointerAllocaValues;
 
+    /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+    /// intrinsic that describes it, if any, so that we can convert it to a
+    /// dbg.value intrinsic if the alloca gets promoted.
+    SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
     /// Visited - The set of basic blocks the renamer has already visited.
     ///
     SmallPtrSet<BasicBlock*, 16> Visited;
@@ -202,6 +220,9 @@ namespace {
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
                    DominanceFrontier &df, AliasSetTracker *ast)
       : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+    ~PromoteMem2Reg() {
+      delete DIF;
+    }
 
     void run();
 
@@ -243,9 +264,9 @@ namespace {
                                   LargeBlockInfo &LBI);
     void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
                                   LargeBlockInfo &LBI);
-    void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst* SI,
-                                         uint64_t Offset);
-                                  
+    void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI);
+
+    
     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                     RenamePassData::ValVector &IncVals,
                     std::vector<RenamePassData> &Worklist);
@@ -262,6 +283,7 @@ namespace {
     bool OnlyUsedInOneBlock;
     
     Value *AllocaPointerVal;
+    DbgDeclareInst *DbgDeclare;
     
     void clear() {
       DefiningBlocks.clear();
@@ -270,6 +292,7 @@ namespace {
       OnlyBlock = 0;
       OnlyUsedInOneBlock = true;
       AllocaPointerVal = 0;
+      DbgDeclare = 0;
     }
     
     /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
@@ -304,28 +327,18 @@ namespace {
             OnlyUsedInOneBlock = false;
         }
       }
+      
+      DbgDeclare = FindAllocaDbgDeclare(AI);
     }
   };
 }  // end of anonymous namespace
 
 
-/// Finds the llvm.dbg.declare intrinsic corresponding to an alloca if any.
-static DbgDeclareInst *findDbgDeclare(AllocaInst *AI) {
-  Function *F = AI->getParent()->getParent();
-  for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
-    for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end();
-         BI != BE; ++BI)
-      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
-        if (DDI->getAddress() == AI)
-          return DDI;
-
-  return 0;
-}
-
 void PromoteMem2Reg::run() {
   Function &F = *DF.getRoot()->getParent();
 
   if (AST) PointerAllocaValues.resize(Allocas.size());
+  AllocaDbgDeclares.resize(Allocas.size());
 
   AllocaInfo Info;
   LargeBlockInfo LBI;
@@ -360,8 +373,11 @@ void PromoteMem2Reg::run() {
 
       // Finally, after the scan, check to see if the store is all that is left.
       if (Info.UsingBlocks.empty()) {
-        // Record debuginfo for the store before removing it.
-        ConvertDebugDeclareToDebugValue(findDbgDeclare(AI), Info.OnlyStore, 0);
+        // Record debuginfo for the store and remove the declaration's debuginfo.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
+          DDI->eraseFromParent();
+        }
         // Remove the (now dead) store and alloca.
         Info.OnlyStore->eraseFromParent();
         LBI.deleteValue(Info.OnlyStore);
@@ -388,11 +404,11 @@ void PromoteMem2Reg::run() {
       if (Info.UsingBlocks.empty()) {
         
         // Remove the (now dead) stores and alloca.
-        DbgDeclareInst *DDI = findDbgDeclare(AI);
         while (!AI->use_empty()) {
           StoreInst *SI = cast<StoreInst>(AI->use_back());
           // Record debuginfo for the store before removing it.
-          ConvertDebugDeclareToDebugValue(DDI, SI, 0);
+          if (DbgDeclareInst *DDI = Info.DbgDeclare)
+            ConvertDebugDeclareToDebugValue(DDI, SI);
           SI->eraseFromParent();
           LBI.deleteValue(SI);
         }
@@ -404,6 +420,10 @@ void PromoteMem2Reg::run() {
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
         
+        // The alloca's debuginfo can be removed as well.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare)
+          DDI->eraseFromParent();
+
         ++NumLocalPromoted;
         continue;
       }
@@ -421,6 +441,9 @@ void PromoteMem2Reg::run() {
     // stored into the alloca.
     if (AST)
       PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+      
+    // Remember the dbg.declare intrinsic describing this alloca, if any.
+    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
     
     // Keep the reverse mapping of the 'Allocas' array for the rename pass.
     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -476,7 +499,11 @@ void PromoteMem2Reg::run() {
     A->eraseFromParent();
   }
 
-  
+  // Remove alloca's dbg.declare instrinsics from the function.
+  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+      DDI->eraseFromParent();
+
   // Loop over all of the PHI nodes and see if there are any that we can get
   // rid of because they merge all of the same incoming values.  This can
   // happen due to undef values coming into the PHI nodes.  This process is
@@ -857,14 +884,19 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
 // Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
 // that has an associated llvm.dbg.decl intrinsic.
 void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
-                                                     StoreInst* SI,
-                                                     uint64_t Offset) {
-  if (!DDI) return;
+                                                     StoreInst *SI) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.getNode())
+    return;
 
   if (!DIF)
     DIF = new DIFactory(*SI->getParent()->getParent()->getParent());
-  DIF->InsertDbgValueIntrinsic(SI->getOperand(0), Offset,
-                               DIVariable(DDI->getVariable()), SI);
+  Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0,
+                                                     DIVar, SI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  if (MDNode *SIMD = SI->getMetadata("dbg"))
+    DbgVal->setMetadata("dbg", SIMD);
 }
 
 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
@@ -980,7 +1012,8 @@ NextIteration:
       // what value were we writing?
       IncomingVals[ai->second] = SI->getOperand(0);
       // Record debuginfo for the store before removing it.
-      ConvertDebugDeclareToDebugValue(findDbgDeclare(Dest), SI, 0);
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+        ConvertDebugDeclareToDebugValue(DDI, SI);
       BB->getInstList().erase(SI);
     }
   }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 161bf21..a31235a 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -71,6 +71,50 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
   getAvailableVals(AV)[BB] = V;
 }
 
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI, 
+                            DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  unsigned PHINumValues = PHI->getNumIncomingValues();
+  if (PHINumValues != ValueMapping.size())
+    return false;
+
+  // Scan the phi to see if it matches.
+  for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+    if (ValueMapping[PHI->getIncomingBlock(i)] !=
+        PHI->getIncomingValue(i)) {
+      return false;
+    }
+
+  return true;
+}
+
+/// GetExistingPHI - Check if BB already contains a phi node that is equivalent
+/// to the specified mapping from predecessor blocks to incoming values.
+static Value *GetExistingPHI(BasicBlock *BB,
+                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  PHINode *SomePHI;
+  for (BasicBlock::iterator It = BB->begin();
+       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+    if (IsEquivalentPHI(SomePHI, ValueMapping))
+      return SomePHI;
+  }
+  return 0;
+}
+
+/// GetExistingPHI - Check if BB already contains an equivalent phi node.
+/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*>
+/// objects that specify the mapping from predecessor blocks to incoming values.
+template<typename InputIt>
+static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I,
+                             const InputIt &E) {
+  // Avoid create the mapping if BB has no phi nodes at all.
+  if (!isa<PHINode>(BB->begin()))
+    return 0;
+  DenseMap<BasicBlock*, Value*> ValueMapping(I, E);
+  return GetExistingPHI(BB, ValueMapping);
+}
+
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
@@ -149,28 +193,11 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   if (SingularValue != 0)
     return SingularValue;
 
-  // Otherwise, we do need a PHI: check to see if we already have one available
-  // in this block that produces the right value.
-  if (isa<PHINode>(BB->begin())) {
-    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
-                                               PredValues.end());
-    PHINode *SomePHI;
-    for (BasicBlock::iterator It = BB->begin();
-         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-      // Scan this phi to see if it is what we need.
-      bool Equal = true;
-      for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i)
-        if (ValueMapping[SomePHI->getIncomingBlock(i)] !=
-            SomePHI->getIncomingValue(i)) {
-          Equal = false;
-          break;
-        }
-         
-      if (Equal)
-        return SomePHI;
-    }
-  }
-  
+  // Otherwise, we do need a PHI.
+  if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(),
+                                          PredValues.end()))
+    return ExistingPHI;
+
   // Ok, we have no way out, insert a new one now.
   PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
                                          PrototypeValue->getName(),
@@ -255,7 +282,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   // producing the same value.  If so, this value will capture it, if not, it
   // will get reset to null.  We distinguish the no-predecessor case explicitly
   // below.
-  TrackingVH<Value> SingularValue;
+  TrackingVH<Value> ExistingValue;
 
   // We can get our predecessor info by walking the pred_iterator list, but it
   // is relatively slow.  If we already have PHI nodes in this block, walk one
@@ -266,11 +293,11 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
       IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
 
-      // Compute SingularValue.
+      // Set ExistingValue to singular value from all predecessors so far.
       if (i == 0)
-        SingularValue = PredVal;
-      else if (PredVal != SingularValue)
-        SingularValue = 0;
+        ExistingValue = PredVal;
+      else if (PredVal != ExistingValue)
+        ExistingValue = 0;
     }
   } else {
     bool isFirstPred = true;
@@ -279,12 +306,12 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
       IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
 
-      // Compute SingularValue.
+      // Set ExistingValue to singular value from all predecessors so far.
       if (isFirstPred) {
-        SingularValue = PredVal;
+        ExistingValue = PredVal;
         isFirstPred = false;
-      } else if (PredVal != SingularValue)
-        SingularValue = 0;
+      } else if (PredVal != ExistingValue)
+        ExistingValue = 0;
     }
   }
 
@@ -300,31 +327,38 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   /// above.
   TrackingVH<Value> &InsertedVal = AvailableVals[BB];
 
-  // If all the predecessor values are the same then we don't need to insert a
+  // If the predecessor values are not all the same, then check to see if there
+  // is an existing PHI that can be used.
+  if (!ExistingValue)
+    ExistingValue = GetExistingPHI(BB,
+                                   IncomingPredInfo.begin()+FirstPredInfoEntry,
+                                   IncomingPredInfo.end());
+
+  // If there is an existing value we can use, then we don't need to insert a
   // PHI.  This is the simple and common case.
-  if (SingularValue) {
-    // If a PHI node got inserted, replace it with the singlar value and delete
+  if (ExistingValue) {
+    // If a PHI node got inserted, replace it with the existing value and delete
     // it.
     if (InsertedVal) {
       PHINode *OldVal = cast<PHINode>(InsertedVal);
       // Be careful about dead loops.  These RAUW's also update InsertedVal.
-      if (InsertedVal != SingularValue)
-        OldVal->replaceAllUsesWith(SingularValue);
+      if (InsertedVal != ExistingValue)
+        OldVal->replaceAllUsesWith(ExistingValue);
       else
         OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
       OldVal->eraseFromParent();
     } else {
-      InsertedVal = SingularValue;
+      InsertedVal = ExistingValue;
     }
 
-    // Either path through the 'if' should have set insertedVal -> SingularVal.
-    assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) &&
-           "RAUW didn't change InsertedVal to be SingularVal");
+    // Either path through the 'if' should have set InsertedVal -> ExistingVal.
+    assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) &&
+           "RAUW didn't change InsertedVal to be ExistingValue");
 
     // Drop the entries we added in IncomingPredInfo to restore the stack.
     IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
                            IncomingPredInfo.end());
-    return SingularValue;
+    return ExistingValue;
   }
 
   // Otherwise, we do need a PHI: insert one now if we don't already have one.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index cb53296..2215059 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -36,6 +37,28 @@ using namespace llvm;
 
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
+namespace {
+class SimplifyCFGOpt {
+  const TargetData *const TD;
+
+  ConstantInt *GetConstantInt(Value *V);
+  Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values);
+  Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values);
+  bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+                              std::vector<ConstantInt*> &Values);
+  Value *isValueEqualityComparison(TerminatorInst *TI);
+  BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+    std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+  bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                                     BasicBlock *Pred);
+  bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
+
+public:
+  explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+  bool run(BasicBlock *BB);
+};
+}
+
 /// SafeToMergeTerminators - Return true if it is safe to merge these two
 /// terminator instructions together.
 ///
@@ -243,17 +266,48 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   return true;
 }
 
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
+  // Normal constant int.
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+  if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType()))
+    return CI;
+
+  // This is some kind of pointer constant. Turn it into a pointer-sized
+  // ConstantInt if possible.
+  const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+  if (isa<ConstantPointerNull>(V))
+    return ConstantInt::get(PtrTy, 0);
+
+  // IntToPtr const int.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        // The constant is very likely to have the right type already.
+        if (CI->getType() == PtrTy)
+          return CI;
+        else
+          return cast<ConstantInt>
+            (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+      }
+  return 0;
+}
+
 /// GatherConstantSetEQs - Given a potentially 'or'd together collection of
 /// icmp_eq instructions that compare a value against a constant, return the
 /// value being compared, and stick the constant into the Values vector.
-static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){
+Value *SimplifyCFGOpt::
+GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) {
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
     if (Inst->getOpcode() == Instruction::ICmp &&
         cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
-      if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+      if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
         Values.push_back(C);
         return Inst->getOperand(0);
-      } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+      } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
         Values.push_back(C);
         return Inst->getOperand(1);
       }
@@ -270,14 +324,15 @@ static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){
 /// GatherConstantSetNEs - Given a potentially 'and'd together collection of
 /// setne instructions that compare a value against a constant, return the value
 /// being compared, and stick the constant into the Values vector.
-static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){
+Value *SimplifyCFGOpt::
+GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) {
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
     if (Inst->getOpcode() == Instruction::ICmp &&
                cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
-      if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+      if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
         Values.push_back(C);
         return Inst->getOperand(0);
-      } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+      } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
         Values.push_back(C);
         return Inst->getOperand(1);
       }
@@ -294,8 +349,8 @@ static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){
 /// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
 /// bunch of comparisons of one value against constants, return the value and
 /// the constants being compared.
-static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
-                                   std::vector<ConstantInt*> &Values) {
+bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+                                            std::vector<ConstantInt*> &Values) {
   if (Cond->getOpcode() == Instruction::Or) {
     CompVal = GatherConstantSetEQs(Cond, Values);
 
@@ -327,29 +382,32 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
 
 /// isValueEqualityComparison - Return true if the specified terminator checks
 /// to see if a value is equal to constant integer value.
-static Value *isValueEqualityComparison(TerminatorInst *TI) {
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+  Value *CV = 0;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     // Do not permit merging of large switch instructions into their
     // predecessors unless there is only one predecessor.
-    if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
-                                               pred_end(SI->getParent())) > 128)
-      return 0;
-
-    return SI->getCondition();
-  }
-  if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+                                             pred_end(SI->getParent())) <= 128)
+      CV = SI->getCondition();
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
     if (BI->isConditional() && BI->getCondition()->hasOneUse())
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
         if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
              ICI->getPredicate() == ICmpInst::ICMP_NE) &&
-            isa<ConstantInt>(ICI->getOperand(1)))
-          return ICI->getOperand(0);
-  return 0;
+            GetConstantInt(ICI->getOperand(1)))
+          CV = ICI->getOperand(0);
+
+  // Unwrap any lossless ptrtoint cast.
+  if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+      CV = PTII->getOperand(0);
+  return CV;
 }
 
 /// GetValueEqualityComparisonCases - Given a value comparison instruction,
 /// decode all of the 'cases' that it represents and return the 'default' block.
-static BasicBlock *
+BasicBlock *SimplifyCFGOpt::
 GetValueEqualityComparisonCases(TerminatorInst *TI,
                                 std::vector<std::pair<ConstantInt*,
                                                       BasicBlock*> > &Cases) {
@@ -362,7 +420,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
 
   BranchInst *BI = cast<BranchInst>(TI);
   ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
-  Cases.push_back(std::make_pair(cast<ConstantInt>(ICI->getOperand(1)),
+  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)),
                                  BI->getSuccessor(ICI->getPredicate() ==
                                                   ICmpInst::ICMP_NE)));
   return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
@@ -421,8 +479,9 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
 /// comparison with the same value, and if that comparison determines the
 /// outcome of this comparison.  If so, simplify TI.  This does a very limited
 /// form of jump threading.
-static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
-                                                          BasicBlock *Pred) {
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                              BasicBlock *Pred) {
   Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
   if (!PredVal) return false;  // Not a value comparison in predecessor.
 
@@ -548,7 +607,7 @@ namespace {
 /// equality comparison instruction (either a switch or a branch on "X == c").
 /// See if any of the predecessors of the terminator block are value comparisons
 /// on the same value.  If so, and if safe to do so, fold them together.
-static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
   BasicBlock *BB = TI->getParent();
   Value *CV = isValueEqualityComparison(TI);  // CondVal
   assert(CV && "Not a comparison?");
@@ -641,6 +700,13 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
       for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
         AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
 
+      // Convert pointer to int before we switch.
+      if (isa<PointerType>(CV->getType())) {
+        assert(TD && "Cannot switch on pointer without TargetData");
+        CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
+                              "magicptr", PTI);
+      }
+
       // Now that the successors are updated, create the new Switch instruction.
       SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault,
                                              PredCases.size(), PTI);
@@ -1011,7 +1077,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType()->isInteger(1)) {
+        CB->getType()->isIntegerTy(1)) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -1589,14 +1655,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
-/// SimplifyCFG - This function is used to do simplification of a CFG.  For
-/// example, it adjusts branches to branches to eliminate the extra hop, it
-/// eliminates unreachable basic blocks, and does other "peephole" optimization
-/// of the CFG.  It returns true if a modification was made.
-///
-/// WARNING:  The entry node of a function may not be simplified.
-///
-bool llvm::SimplifyCFG(BasicBlock *BB) {
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
   bool Changed = false;
   Function *M = BB->getParent();
 
@@ -1997,7 +2056,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
         Value *CompVal = 0;
         std::vector<ConstantInt*> Values;
         bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
-        if (CompVal && CompVal->getType()->isInteger()) {
+        if (CompVal) {
           // There might be duplicate constants in the list, which the switch
           // instruction can't handle, remove them now.
           std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
@@ -2008,6 +2067,14 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
           BasicBlock *EdgeBB    = BI->getSuccessor(0);
           if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
 
+          // Convert pointer to int before we switch.
+          if (isa<PointerType>(CompVal->getType())) {
+            assert(TD && "Cannot switch on pointer without TargetData");
+            CompVal = new PtrToIntInst(CompVal,
+                                       TD->getIntPtrType(CompVal->getContext()),
+                                       "magicptr", BI);
+          }
+
           // Create the new switch instruction now.
           SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
                                                Values.size(), BI);
@@ -2035,3 +2102,14 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
 
   return Changed;
 }
+
+/// SimplifyCFG - This function is used to do simplification of a CFG.  For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG.  It returns true if a modification was made.
+///
+/// WARNING:  The entry node of a function may not be simplified.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+  return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a6e6701..6045048 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -35,7 +35,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
     SmallVector<Value*, 4> Elts;
-    for (unsigned i = 0; i != MD->getNumOperands(); i++)
+    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
       Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0);
     return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size());
   }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index c9f3849..f5ba7e7 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
@@ -238,6 +239,19 @@ void TypePrinting::CalcTypeName(const Type *Ty,
       OS << '>';
     break;
   }
+  case Type::UnionTyID: {
+    const UnionType *UTy = cast<UnionType>(Ty);
+    OS << "union { ";
+    for (StructType::element_iterator I = UTy->element_begin(),
+         E = UTy->element_end(); I != E; ++I) {
+      CalcTypeName(*I, TypeStack, OS);
+      if (next(I) != UTy->element_end())
+        OS << ',';
+      OS << ' ';
+    }
+    OS << '}';
+    break;
+  }
   case Type::PointerTyID: {
     const PointerType *PTy = cast<PointerType>(Ty);
     CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -417,13 +431,13 @@ static void AddModuleTypesToPrinter(TypePrinting &TP,
     // they are used too often to have a single useful name.
     if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
       const Type *PETy = PTy->getElementType();
-      if ((PETy->isPrimitiveType() || PETy->isInteger()) &&
+      if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
           !isa<OpaqueType>(PETy))
         continue;
     }
 
     // Likewise don't insert primitives either.
-    if (Ty->isInteger() || Ty->isPrimitiveType())
+    if (Ty->isIntegerTy() || Ty->isPrimitiveType())
       continue;
 
     // Get the name as a string and insert it into TypeNames.
@@ -835,7 +849,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
 static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
                              TypePrinting &TypePrinter, SlotTracker *Machine) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (CI->getType()->isInteger(1)) {
+    if (CI->getType()->isIntegerTy(1)) {
       Out << (CI->getZExtValue() ? "true" : "false");
       return;
     }
@@ -855,7 +869,8 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
       bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
       double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
                               CFP->getValueAPF().convertToFloat();
-      std::string StrVal = ftostr(CFP->getValueAPF());
+      SmallString<128> StrVal;
+      raw_svector_ostream(StrVal) << Val;
 
       // Check to make sure that the stringized number is not some string like
       // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
@@ -866,7 +881,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
            (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
         // Reparse stringized version!
         if (atof(StrVal.c_str()) == Val) {
-          Out << StrVal;
+          Out << StrVal.str();
           return;
         }
       }
@@ -1250,15 +1265,14 @@ public:
   void printArgument(const Argument *FA, Attributes Attrs);
   void printBasicBlock(const BasicBlock *BB);
   void printInstruction(const Instruction &I);
-private:
 
+private:
   // printInfoComment - Print a little comment after the instruction indicating
   // which slot it occupies.
   void printInfoComment(const Value &V);
 };
 }  // end of anonymous namespace
 
-
 void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
   if (Operand == 0) {
     Out << "<null operand!>";
@@ -1402,8 +1416,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
   case GlobalValue::AvailableExternallyLinkage:
     Out << "available_externally ";
     break;
-    // This is invalid syntax and just a debugging aid.
-  case GlobalValue::GhostLinkage:	  Out << "ghost ";	    break;
   }
 }
 
@@ -1418,6 +1430,9 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
 }
 
 void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+  if (GV->isMaterializable())
+    Out << "; Materializable\n";
+
   WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine);
   Out << " = ";
 
@@ -1448,6 +1463,9 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
 }
 
 void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+  if (GA->isMaterializable())
+    Out << "; Materializable\n";
+
   // Don't crash when dumping partially built GA
   if (!GA->hasName())
     Out << "<<nameless>> = ";
@@ -1521,6 +1539,9 @@ void AssemblyWriter::printFunction(const Function *F) {
 
   if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
 
+  if (F->isMaterializable())
+    Out << "; Materializable\n";
+
   if (F->isDeclaration())
     Out << "declare ";
   else
@@ -1680,11 +1701,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
-
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
+  if (AnnotationWriter) {
+    AnnotationWriter->printInfoComment(V, Out);
+    return;
+  }
+
   if (V.getType()->isVoidTy()) return;
   
   Out.PadToColumn(50);
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 65155f1..ff0cc9b 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -56,6 +56,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "optsize ";
   if (Attrs & Attribute::NoInline)
     Result += "noinline ";
+  if (Attrs & Attribute::InlineHint)
+    Result += "inlinehint ";
   if (Attrs & Attribute::AlwaysInline)
     Result += "alwaysinline ";
   if (Attrs & Attribute::StackProtect)
@@ -68,6 +70,11 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "noimplicitfloat ";
   if (Attrs & Attribute::Naked)
     Result += "naked ";
+  if (Attrs & Attribute::StackAlignment) {
+    Result += "alignstack(";
+    Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+    Result += ") ";
+  }
   if (Attrs & Attribute::Alignment) {
     Result += "align ";
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
@@ -82,7 +89,7 @@ std::string Attribute::getAsString(Attributes Attrs) {
 Attributes Attribute::typeIncompatible(const Type *Ty) {
   Attributes Incompatible = None;
   
-  if (!Ty->isInteger())
+  if (!Ty->isIntegerTy())
     // Attributes that only apply to integers.
     Incompatible |= SExt | ZExt;
   
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 5ecedf1..4b80e36 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -8,17 +8,17 @@ add_llvm_library(LLVMCore
   Core.cpp
   Dominators.cpp
   Function.cpp
+  GVMaterializer.cpp
   Globals.cpp
+  IRBuilder.cpp
   InlineAsm.cpp
   Instruction.cpp
   Instructions.cpp
   IntrinsicInst.cpp
-  IRBuilder.cpp
   LLVMContext.cpp
   LeakDetector.cpp
   Metadata.cpp
   Module.cpp
-  ModuleProvider.cpp
   Pass.cpp
   PassManager.cpp
   PrintModulePass.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index ddd5587..78a45e8 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -41,7 +40,7 @@ using namespace llvm;
 /// BitCastConstantVector - Convert the specified ConstantVector node to the
 /// specified vector type.  At this point, we know that the elements of the
 /// input vector constant are all simple integer or FP values.
-static Constant *BitCastConstantVector(LLVMContext &Context, ConstantVector *CV,
+static Constant *BitCastConstantVector(ConstantVector *CV,
                                        const VectorType *DstTy) {
   // If this cast changes element count then we can't handle it here:
   // doing so requires endianness information.  This should be handled by
@@ -91,8 +90,7 @@ foldConstantCastPair(
                                         Type::getInt64Ty(DstTy->getContext()));
 }
 
-static Constant *FoldBitCast(LLVMContext &Context, 
-                             Constant *V, const Type *DestTy) {
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
   const Type *SrcTy = V->getType();
   if (SrcTy == DestTy)
     return V; // no-op cast
@@ -103,7 +101,8 @@ static Constant *FoldBitCast(LLVMContext &Context,
     if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
       if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
         SmallVector<Value*, 8> IdxList;
-        Value *Zero = Constant::getNullValue(Type::getInt32Ty(Context));
+        Value *Zero =
+          Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
         IdxList.push_back(Zero);
         const Type *ElTy = PTy->getElementType();
         while (ElTy != DPTy->getElementType()) {
@@ -139,15 +138,14 @@ static Constant *FoldBitCast(LLVMContext &Context,
         return Constant::getNullValue(DestTy);
 
       if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-        return BitCastConstantVector(Context, CV, DestPTy);
+        return BitCastConstantVector(CV, DestPTy);
     }
 
     // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
     // This allows for other simplifications (although some of them
     // can only be handled by Analysis/ConstantFolding.cpp).
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
-      return ConstantExpr::getBitCast(
-                                     ConstantVector::get(&V, 1), DestPTy);
+      return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy);
   }
 
   // Finally, implement bitcast folding now.   The code below doesn't handle
@@ -157,23 +155,24 @@ static Constant *FoldBitCast(LLVMContext &Context,
 
   // Handle integral constant input.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    if (DestTy->isInteger())
+    if (DestTy->isIntegerTy())
       // Integral -> Integral. This is a no-op because the bit widths must
       // be the same. Consequently, we just fold to V.
       return V;
 
-    if (DestTy->isFloatingPoint())
-      return ConstantFP::get(Context, APFloat(CI->getValue(),
-                                     DestTy != Type::getPPC_FP128Ty(Context)));
+    if (DestTy->isFloatingPointTy())
+      return ConstantFP::get(DestTy->getContext(),
+                             APFloat(CI->getValue(),
+                                     !DestTy->isPPC_FP128Ty()));
 
     // Otherwise, can't fold this (vector?)
     return 0;
   }
 
-  // Handle ConstantFP input.
+  // Handle ConstantFP input: FP -> Integral.
   if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
-    // FP -> Integral.
-    return ConstantInt::get(Context, FP->getValueAPF().bitcastToAPInt());
+    return ConstantInt::get(FP->getContext(),
+                            FP->getValueAPF().bitcastToAPInt());
 
   return 0;
 }
@@ -323,9 +322,195 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
   }
 }
 
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
+                                 bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    Constant *N = ConstantInt::get(DestTy, VTy->getNumElements());
+    Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has size zero.
+      if (NumElems == 0)
+        return ConstantExpr::getNullValue(DestTy);
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantInt::get(DestTy, NumElems);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // Pointer size doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                         PTy->getAddressSpace()),
+                        DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular sizeof expression.
+  Constant *C = ConstantExpr::getSizeOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
+                                  bool Folded) {
+  // The alignment of an array is equal to the alignment of the
+  // array element. Note that this is not always true for vectors.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+    C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                      DestTy,
+                                                      false),
+                              C, DestTy);
+    return C;
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    // Packed structs always have an alignment of 1.
+    if (STy->isPacked())
+      return ConstantInt::get(DestTy, 1);
+
+    // Otherwise, struct alignment is the maximum alignment of any member.
+    // Without target data, we can't compare much, but we can check to see
+    // if all the members have the same alignment.
+    unsigned NumElems = STy->getNumElements();
+    // An empty struct has minimal alignment.
+    if (NumElems == 0)
+      return ConstantInt::get(DestTy, 1);
+    // Check for a struct with all members having the same alignment.
+    Constant *MemberAlign =
+      getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberAlign;
+  }
+
+  // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+                                                           1),
+                                          PTy->getAddressSpace()),
+                         DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular alignof expression.
+  Constant *C = ConstantExpr::getAlignOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
+                                   const Type *DestTy,
+                                   bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+                                                                DestTy, false),
+                                        FieldNo, DestTy);
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+                                                                DestTy, false),
+                                        FieldNo, DestTy);
+    Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has no members.
+      if (NumElems == 0)
+        return 0;
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+                                                                    false,
+                                                                    DestTy,
+                                                                    false),
+                                            FieldNo, DestTy);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular offsetof expression.
+  Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
 
-Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, 
-                                            unsigned opc, Constant *V,
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
                                             const Type *DestTy) {
   if (isa<UndefValue>(V)) {
     // zext(undef) = 0, because the top bits will be zero.
@@ -394,7 +579,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
                   DestTy->isFP128Ty() ? APFloat::IEEEquad :
                   APFloat::Bogus,
                   APFloat::rmNearestTiesToEven, &ignored);
-      return ConstantFP::get(Context, Val);
+      return ConstantFP::get(V->getContext(), Val);
     }
     return 0; // Can't fold.
   case Instruction::FPToUI: 
@@ -407,7 +592,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
       (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
                                 APFloat::rmTowardZero, &ignored);
       APInt Val(DestBitWidth, 2, x);
-      return ConstantInt::get(Context, Val);
+      return ConstantInt::get(FPC->getContext(), Val);
     }
     return 0; // Can't fold.
   case Instruction::IntToPtr:   //always treated as unsigned
@@ -415,9 +600,49 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
       return ConstantPointerNull::get(cast<PointerType>(DestTy));
     return 0;                   // Other pointer types cannot be casted
   case Instruction::PtrToInt:   // always treated as unsigned
-    if (V->isNullValue())       // is it a null pointer value?
+    // Is it a null pointer value?
+    if (V->isNullValue())
       return ConstantInt::get(DestTy, 0);
-    return 0;                   // Other pointer types cannot be casted
+    // If this is a sizeof-like expression, pull out multiplications by
+    // known factors to expose them to subsequent folding. If it's an
+    // alignof-like expression, factor out known factors.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          CE->getOperand(0)->isNullValue()) {
+        const Type *Ty =
+          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+        if (CE->getNumOperands() == 2) {
+          // Handle a sizeof-like expression.
+          Constant *Idx = CE->getOperand(1);
+          bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+          if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+            Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+                                                                DestTy, false),
+                                        Idx, DestTy);
+            return ConstantExpr::getMul(C, Idx);
+          }
+        } else if (CE->getNumOperands() == 3 &&
+                   CE->getOperand(1)->isNullValue()) {
+          // Handle an alignof-like expression.
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked()) {
+              ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+              if (CI->isOne() &&
+                  STy->getNumElements() == 2 &&
+                  STy->getElementType(0)->isIntegerTy(1)) {
+                return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+              }
+            }
+          // Handle an offsetof-like expression.
+          if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){
+            if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+                                                DestTy, false))
+              return C;
+          }
+        }
+      }
+    // Other pointer types cannot be casted
+    return 0;
   case Instruction::UIToFP:
   case Instruction::SIToFP:
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -428,7 +653,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
       (void)apf.convertFromAPInt(api, 
                                  opc==Instruction::SIToFP,
                                  APFloat::rmNearestTiesToEven);
-      return ConstantFP::get(Context, apf);
+      return ConstantFP::get(V->getContext(), apf);
     }
     return 0;
   case Instruction::ZExt:
@@ -436,7 +661,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
       APInt Result(CI->getValue());
       Result.zext(BitWidth);
-      return ConstantInt::get(Context, Result);
+      return ConstantInt::get(V->getContext(), Result);
     }
     return 0;
   case Instruction::SExt:
@@ -444,7 +669,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
       APInt Result(CI->getValue());
       Result.sext(BitWidth);
-      return ConstantInt::get(Context, Result);
+      return ConstantInt::get(V->getContext(), Result);
     }
     return 0;
   case Instruction::Trunc: {
@@ -452,7 +677,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       APInt Result(CI->getValue());
       Result.trunc(DestBitWidth);
-      return ConstantInt::get(Context, Result);
+      return ConstantInt::get(V->getContext(), Result);
     }
     
     // The input must be a constantexpr.  See if we can simplify this based on
@@ -466,12 +691,11 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context,
     return 0;
   }
   case Instruction::BitCast:
-    return FoldBitCast(Context, V, DestTy);
+    return FoldBitCast(V, DestTy);
   }
 }
 
-Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&,
-                                              Constant *Cond,
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
                                               Constant *V1, Constant *V2) {
   if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
     return CB->getZExtValue() ? V1 : V2;
@@ -483,8 +707,7 @@ Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&,
   return 0;
 }
 
-Constant *llvm::ConstantFoldExtractElementInstruction(LLVMContext &Context,
-                                                      Constant *Val,
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
                                                       Constant *Idx) {
   if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
     return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
@@ -503,8 +726,7 @@ Constant *llvm::ConstantFoldExtractElementInstruction(LLVMContext &Context,
   return 0;
 }
 
-Constant *llvm::ConstantFoldInsertElementInstruction(LLVMContext &Context,
-                                                     Constant *Val,
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
                                                      Constant *Elt,
                                                      Constant *Idx) {
   ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
@@ -563,8 +785,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(LLVMContext &Context,
 
 /// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
 /// return the specified element value.  Otherwise return null.
-static Constant *GetVectorElement(LLVMContext &Context, Constant *C,
-                                  unsigned EltNo) {
+static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
   if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
     return CV->getOperand(EltNo);
 
@@ -576,8 +797,7 @@ static Constant *GetVectorElement(LLVMContext &Context, Constant *C,
   return 0;
 }
 
-Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
-                                                     Constant *V1,
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
                                                      Constant *V2,
                                                      Constant *Mask) {
   // Undefined shuffle mask -> undefined value.
@@ -590,7 +810,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
   // Loop over the shuffle mask, evaluating each element.
   SmallVector<Constant*, 32> Result;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
-    Constant *InElt = GetVectorElement(Context, Mask, i);
+    Constant *InElt = GetVectorElement(Mask, i);
     if (InElt == 0) return 0;
 
     if (isa<UndefValue>(InElt))
@@ -600,9 +820,9 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
       if (Elt >= SrcNumElts*2)
         InElt = UndefValue::get(EltTy);
       else if (Elt >= SrcNumElts)
-        InElt = GetVectorElement(Context, V2, Elt - SrcNumElts);
+        InElt = GetVectorElement(V2, Elt - SrcNumElts);
       else
-        InElt = GetVectorElement(Context, V1, Elt);
+        InElt = GetVectorElement(V1, Elt);
       if (InElt == 0) return 0;
     } else {
       // Unknown value.
@@ -614,8 +834,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
   return ConstantVector::get(&Result[0], Result.size());
 }
 
-Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context,
-                                                    Constant *Agg,
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
                                                     const unsigned *Idxs,
                                                     unsigned NumIdx) {
   // Base case: no indices, so return the entire value.
@@ -635,19 +854,18 @@ Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context,
 
   // Otherwise recurse.
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
-    return ConstantFoldExtractValueInstruction(Context, CS->getOperand(*Idxs),
+    return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs),
                                                Idxs+1, NumIdx-1);
 
   if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
-    return ConstantFoldExtractValueInstruction(Context, CA->getOperand(*Idxs),
+    return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs),
                                                Idxs+1, NumIdx-1);
   ConstantVector *CV = cast<ConstantVector>(Agg);
-  return ConstantFoldExtractValueInstruction(Context, CV->getOperand(*Idxs),
+  return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs),
                                              Idxs+1, NumIdx-1);
 }
 
-Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
-                                                   Constant *Agg,
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
                                                    Constant *Val,
                                                    const unsigned *Idxs,
                                                    unsigned NumIdx) {
@@ -667,6 +885,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
     unsigned numOps;
     if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
       numOps = AR->getNumElements();
+    else if (isa<UnionType>(AggTy))
+      numOps = 1;
     else
       numOps = cast<StructType>(AggTy)->getNumElements();
     
@@ -675,14 +895,18 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
         (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(Context, UndefValue::get(MemberTy),
+        ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
                                            Val, Idxs+1, NumIdx-1) :
         UndefValue::get(MemberTy);
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(Context, Ops, ST->isPacked());
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+    if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) {
+      assert(Ops.size() == 1 && "Union can only contain a single value!");
+      return ConstantUnion::get(UT, Ops[0]);
+    }
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -706,15 +930,14 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
         (*Idxs == i) ?
-        ConstantFoldInsertValueInstruction(Context, 
-                                           Constant::getNullValue(MemberTy),
+        ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
                                            Val, Idxs+1, NumIdx-1) :
         Constant::getNullValue(MemberTy);
       Ops[i] = Op;
     }
     
-    if (const StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(Context, Ops, ST->isPacked());
+    if (const StructType *ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -724,13 +947,12 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
     for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
       Constant *Op = cast<Constant>(Agg->getOperand(i));
       if (*Idxs == i)
-        Op = ConstantFoldInsertValueInstruction(Context, Op,
-                                                Val, Idxs+1, NumIdx-1);
+        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1);
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
-      return ConstantStruct::get(Context, Ops, ST->isPacked());
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
     return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
   }
 
@@ -738,8 +960,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context,
 }
 
 
-Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
-                                              unsigned Opcode,
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
                                               Constant *C1, Constant *C2) {
   // No compile-time operations on this type yet.
   if (C1->getType()->isPPC_FP128Ty())
@@ -896,51 +1117,51 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
       default:
         break;
       case Instruction::Add:     
-        return ConstantInt::get(Context, C1V + C2V);
+        return ConstantInt::get(CI1->getContext(), C1V + C2V);
       case Instruction::Sub:     
-        return ConstantInt::get(Context, C1V - C2V);
+        return ConstantInt::get(CI1->getContext(), C1V - C2V);
       case Instruction::Mul:     
-        return ConstantInt::get(Context, C1V * C2V);
+        return ConstantInt::get(CI1->getContext(), C1V * C2V);
       case Instruction::UDiv:
         assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(Context, C1V.udiv(C2V));
+        return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
       case Instruction::SDiv:
         assert(!CI2->isNullValue() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
           return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
-        return ConstantInt::get(Context, C1V.sdiv(C2V));
+        return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
       case Instruction::URem:
         assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(Context, C1V.urem(C2V));
+        return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
       case Instruction::SRem:
         assert(!CI2->isNullValue() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
           return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
-        return ConstantInt::get(Context, C1V.srem(C2V));
+        return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
       case Instruction::And:
-        return ConstantInt::get(Context, C1V & C2V);
+        return ConstantInt::get(CI1->getContext(), C1V & C2V);
       case Instruction::Or:
-        return ConstantInt::get(Context, C1V | C2V);
+        return ConstantInt::get(CI1->getContext(), C1V | C2V);
       case Instruction::Xor:
-        return ConstantInt::get(Context, C1V ^ C2V);
+        return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
       case Instruction::Shl: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(Context, C1V.shl(shiftAmt));
+          return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
       case Instruction::LShr: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(Context, C1V.lshr(shiftAmt));
+          return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
       case Instruction::AShr: {
         uint32_t shiftAmt = C2V.getZExtValue();
         if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(Context, C1V.ashr(shiftAmt));
+          return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
         else
           return UndefValue::get(C1->getType()); // too big shift is undef
       }
@@ -970,19 +1191,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
         break;
       case Instruction::FAdd:
         (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Context, C3V);
+        return ConstantFP::get(C1->getContext(), C3V);
       case Instruction::FSub:
         (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Context, C3V);
+        return ConstantFP::get(C1->getContext(), C3V);
       case Instruction::FMul:
         (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Context, C3V);
+        return ConstantFP::get(C1->getContext(), C3V);
       case Instruction::FDiv:
         (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Context, C3V);
+        return ConstantFP::get(C1->getContext(), C3V);
       case Instruction::FRem:
         (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Context, C3V);
+        return ConstantFP::get(C1->getContext(), C3V);
       }
     }
   } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
@@ -1127,10 +1348,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
     }
   }
 
-  if (isa<ConstantExpr>(C1)) {
+  if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
     // There are many possible foldings we could do here.  We should probably
     // at least fold add of a pointer with an integer into the appropriate
     // getelementptr.  This will improve alias analysis a bit.
+
+    // Given ((a + b) + c), if (b + c) folds to something interesting, return
+    // (a + (b + c)).
+    if (Instruction::isAssociative(Opcode, C1->getType()) &&
+        CE1->getOpcode() == Opcode) {
+      Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+      if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+        return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+    }
   } else if (isa<ConstantExpr>(C2)) {
     // If C2 is a constant expr and C1 isn't, flop them around and fold the
     // other way if possible.
@@ -1143,7 +1373,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
     case Instruction::Or:
     case Instruction::Xor:
       // No change of opcode required.
-      return ConstantFoldBinaryInstruction(Context, Opcode, C2, C1);
+      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
 
     case Instruction::Shl:
     case Instruction::LShr:
@@ -1162,7 +1392,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
   }
 
   // i1 can be simplified in many cases.
-  if (C1->getType()->isInteger(1)) {
+  if (C1->getType()->isIntegerTy(1)) {
     switch (Opcode) {
     case Instruction::Add:
     case Instruction::Sub:
@@ -1184,7 +1414,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
     case Instruction::SRem:
       // We can assume that C2 == 1.  If it were zero the result would be
       // undefined through division by zero.
-      return ConstantInt::getFalse(Context);
+      return ConstantInt::getFalse(C1->getContext());
     default:
       break;
     }
@@ -1218,8 +1448,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
 /// first is less than the second, return -1, if the second is less than the
 /// first, return 1.  If the constants are not integral, return -2.
 ///
-static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, 
-                      const Type *ElTy) {
+static int IdxCompare(Constant *C1, Constant *C2,  const Type *ElTy) {
   if (C1 == C2) return 0;
 
   // Ok, we found a different index.  If they are not ConstantInt, we can't do
@@ -1229,11 +1458,11 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2,
 
   // Ok, we have two differing integer indices.  Sign extend them to be the same
   // type.  Long is always big enough, so we use it.
-  if (!C1->getType()->isInteger(64))
-    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
+  if (!C1->getType()->isIntegerTy(64))
+    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
 
-  if (!C2->getType()->isInteger(64))
-    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
+  if (!C2->getType()->isIntegerTy(64))
+    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
 
   if (C1 == C2) return 0;  // They are equal
 
@@ -1262,8 +1491,7 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2,
 /// To simplify this code we canonicalize the relation so that the first
 /// operand is always the most "complex" of the two.  We consider ConstantFP
 /// to be the simplest, and ConstantExprs to be the most complex.
-static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context,
-                                                Constant *V1, Constant *V2) {
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
   assert(V1->getType() == V2->getType() &&
          "Cannot compare values of different types!");
 
@@ -1296,7 +1524,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context,
     }
 
     // If the first operand is simple and second is ConstantExpr, swap operands.
-    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(Context, V2, V1);
+    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
     if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
       return FCmpInst::getSwappedPredicate(SwappedRelation);
   } else {
@@ -1331,16 +1559,16 @@ static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context,
 /// constants (like ConstantInt) to be the simplest, followed by
 /// GlobalValues, followed by ConstantExpr's (the most complex).
 ///
-static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
-                                                Constant *V1, 
-                                                Constant *V2,
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
                                                 bool isSigned) {
   assert(V1->getType() == V2->getType() &&
          "Cannot compare different types of values!");
   if (V1 == V2) return ICmpInst::ICMP_EQ;
 
-  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1)) {
-    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2)) {
+  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+      !isa<BlockAddress>(V1)) {
+    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+        !isa<BlockAddress>(V2)) {
       // We distilled this down to a simple case, use the standard constant
       // folder.
       ConstantInt *R = 0;
@@ -1363,36 +1591,63 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
 
     // If the first operand is simple, swap operands.
     ICmpInst::Predicate SwappedRelation = 
-      evaluateICmpRelation(Context, V2, V1, isSigned);
+      evaluateICmpRelation(V2, V1, isSigned);
     if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
       return ICmpInst::getSwappedPredicate(SwappedRelation);
 
-  } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(V1)) {
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
     if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
       ICmpInst::Predicate SwappedRelation = 
-        evaluateICmpRelation(Context, V2, V1, isSigned);
+        evaluateICmpRelation(V2, V1, isSigned);
       if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
         return ICmpInst::getSwappedPredicate(SwappedRelation);
-      else
-        return ICmpInst::BAD_ICMP_PREDICATE;
+      return ICmpInst::BAD_ICMP_PREDICATE;
     }
 
-    // Now we know that the RHS is a GlobalValue or simple constant,
-    // which (since the types must match) means that it's a ConstantPointerNull.
-    if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) {
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it's a
+    // ConstantPointerNull).
+    if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
       // Don't try to decide equality of aliases.
-      if (!isa<GlobalAlias>(CPR1) && !isa<GlobalAlias>(CPR2))
-        if (!CPR1->hasExternalWeakLinkage() || !CPR2->hasExternalWeakLinkage())
+      if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+        if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
           return ICmpInst::ICMP_NE;
+    } else if (isa<BlockAddress>(V2)) {
+      return ICmpInst::ICMP_NE; // Globals never equal labels.
     } else {
       assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
-      // GlobalVals can never be null.  Don't try to evaluate aliases.
-      if (!CPR1->hasExternalWeakLinkage() && !isa<GlobalAlias>(CPR1))
+      // GlobalVals can never be null unless they have external weak linkage.
+      // We don't try to evaluate aliases here.
+      if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
         return ICmpInst::ICMP_NE;
     }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+    
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it is a
+    // ConstantPointerNull).
+    if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+      // Block address in another function can't equal this one, but block
+      // addresses in the current function might be the same if blocks are
+      // empty.
+      if (BA2->getFunction() != BA->getFunction())
+        return ICmpInst::ICMP_NE;
+    } else {
+      // Block addresses aren't null, don't equal the address of globals.
+      assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+             "Canonicalization guarantee!");
+      return ICmpInst::ICMP_NE;
+    }
   } else {
     // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
-    // constantexpr, a CPR, or a simple constant.
+    // constantexpr, a global, block address, or a simple constant.
     ConstantExpr *CE1 = cast<ConstantExpr>(V1);
     Constant *CE1Op0 = CE1->getOperand(0);
 
@@ -1412,10 +1667,10 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
       // If the cast is not actually changing bits, and the second operand is a
       // null pointer, do the comparison with the pre-casted value.
       if (V2->isNullValue() &&
-          (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) {
+          (isa<PointerType>(CE1->getType()) || CE1->getType()->isIntegerTy())) {
         if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
         if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
-        return evaluateICmpRelation(Context, CE1Op0,
+        return evaluateICmpRelation(CE1Op0,
                                     Constant::getNullValue(CE1Op0->getType()), 
                                     isSigned);
       }
@@ -1447,9 +1702,9 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
           return ICmpInst::ICMP_EQ;
         }
         // Otherwise, we can't really say if the first operand is null or not.
-      } else if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) {
+      } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
         if (isa<ConstantPointerNull>(CE1Op0)) {
-          if (CPR2->hasExternalWeakLinkage())
+          if (GV2->hasExternalWeakLinkage())
             // Weak linkage GVals could be zero or not. We're comparing it to
             // a null pointer, so its less-or-equal
             return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
@@ -1457,8 +1712,8 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
             // If its not weak linkage, the GVal must have a non-zero address
             // so the result is less-than
             return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-        } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(CE1Op0)) {
-          if (CPR1 == CPR2) {
+        } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV == GV2) {
             // If this is a getelementptr of the same global, then it must be
             // different.  Because the types must match, the getelementptr could
             // only have at most one index, and because we fold getelementptr's
@@ -1504,7 +1759,7 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
             gep_type_iterator GTI = gep_type_begin(CE1);
             for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
                  ++i, ++GTI)
-              switch (IdxCompare(Context, CE1->getOperand(i),
+              switch (IdxCompare(CE1->getOperand(i),
                                  CE2->getOperand(i), GTI.getIndexedType())) {
               case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
               case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
@@ -1540,14 +1795,14 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context,
   return ICmpInst::BAD_ICMP_PREDICATE;
 }
 
-Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
-                                               unsigned short pred, 
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
                                                Constant *C1, Constant *C2) {
   const Type *ResultTy;
   if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
-    ResultTy = VectorType::get(Type::getInt1Ty(Context), VT->getNumElements());
+    ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+                               VT->getNumElements());
   else
-    ResultTy = Type::getInt1Ty(Context);
+    ResultTy = Type::getInt1Ty(C1->getContext());
 
   // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
   if (pred == FCmpInst::FCMP_FALSE)
@@ -1570,9 +1825,9 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
       // Don't try to evaluate aliases.  External weak GV can be null.
       if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
         if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse(Context);
+          return ConstantInt::getFalse(C1->getContext());
         else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue(Context);
+          return ConstantInt::getTrue(C1->getContext());
       }
   // icmp eq/ne(GV,null) -> false/true
   } else if (C2->isNullValue()) {
@@ -1580,14 +1835,14 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
       // Don't try to evaluate aliases.  External weak GV can be null.
       if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
         if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse(Context);
+          return ConstantInt::getFalse(C1->getContext());
         else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue(Context);
+          return ConstantInt::getTrue(C1->getContext());
       }
   }
 
   // If the comparison is a comparison between two i1's, simplify it.
-  if (C1->getType()->isInteger(1)) {
+  if (C1->getType()->isIntegerTy(1)) {
     switch(pred) {
     case ICmpInst::ICMP_EQ:
       if (isa<ConstantInt>(C2))
@@ -1605,26 +1860,16 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
     APInt V2 = cast<ConstantInt>(C2)->getValue();
     switch (pred) {
     default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
-    case ICmpInst::ICMP_EQ:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1 == V2);
-    case ICmpInst::ICMP_NE: 
-      return ConstantInt::get(Type::getInt1Ty(Context), V1 != V2);
-    case ICmpInst::ICMP_SLT:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.slt(V2));
-    case ICmpInst::ICMP_SGT:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.sgt(V2));
-    case ICmpInst::ICMP_SLE:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.sle(V2));
-    case ICmpInst::ICMP_SGE:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.sge(V2));
-    case ICmpInst::ICMP_ULT:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.ult(V2));
-    case ICmpInst::ICMP_UGT:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.ugt(V2));
-    case ICmpInst::ICMP_ULE:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.ule(V2));
-    case ICmpInst::ICMP_UGE:
-      return ConstantInt::get(Type::getInt1Ty(Context), V1.uge(V2));
+    case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
+    case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
+    case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+    case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+    case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+    case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+    case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+    case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+    case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+    case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
     }
   } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
     APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
@@ -1632,47 +1877,47 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
     APFloat::cmpResult R = C1V.compare(C2V);
     switch (pred) {
     default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
-    case FCmpInst::FCMP_FALSE: return ConstantInt::getFalse(Context);
-    case FCmpInst::FCMP_TRUE:  return ConstantInt::getTrue(Context);
+    case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+    case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
     case FCmpInst::FCMP_UNO:
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
     case FCmpInst::FCMP_ORD:
-      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpUnordered);
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
     case FCmpInst::FCMP_UEQ:
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
-                                            R==APFloat::cmpEqual);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpEqual);
     case FCmpInst::FCMP_OEQ:   
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpEqual);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
     case FCmpInst::FCMP_UNE:
-      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpEqual);
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
     case FCmpInst::FCMP_ONE:   
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan ||
-                                            R==APFloat::cmpGreaterThan);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_ULT: 
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
-                                            R==APFloat::cmpLessThan);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpLessThan);
     case FCmpInst::FCMP_OLT:   
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
     case FCmpInst::FCMP_UGT:
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered ||
-                                            R==APFloat::cmpGreaterThan);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_OGT:
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_ULE:
-      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpGreaterThan);
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
     case FCmpInst::FCMP_OLE: 
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan ||
-                                            R==APFloat::cmpEqual);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpEqual);
     case FCmpInst::FCMP_UGE:
-      return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpLessThan);
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
     case FCmpInst::FCMP_OGE: 
-      return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan ||
-                                            R==APFloat::cmpEqual);
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+                                        R==APFloat::cmpEqual);
     }
   } else if (isa<VectorType>(C1->getType())) {
     SmallVector<Constant*, 16> C1Elts, C2Elts;
-    C1->getVectorElements(Context, C1Elts);
-    C2->getVectorElements(Context, C2Elts);
+    C1->getVectorElements(C1Elts);
+    C2->getVectorElements(C2Elts);
     if (C1Elts.empty() || C2Elts.empty())
       return 0;
 
@@ -1686,9 +1931,9 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
     return ConstantVector::get(&ResElts[0], ResElts.size());
   }
 
-  if (C1->getType()->isFloatingPoint()) {
+  if (C1->getType()->isFloatingPointTy()) {
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateFCmpRelation(Context, C1, C2)) {
+    switch (evaluateFCmpRelation(C1, C2)) {
     default: llvm_unreachable("Unknown relation!");
     case FCmpInst::FCMP_UNO:
     case FCmpInst::FCMP_ORD:
@@ -1742,12 +1987,12 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
 
     // If we evaluated the result, return it now.
     if (Result != -1)
-      return ConstantInt::get(Type::getInt1Ty(Context), Result);
+      return ConstantInt::get(ResultTy, Result);
 
   } else {
     // Evaluate the relation between the two constants, per the predicate.
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateICmpRelation(Context, C1, C2, CmpInst::isSigned(pred))) {
+    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
     default: llvm_unreachable("Unknown relational!");
     case ICmpInst::BAD_ICMP_PREDICATE:
       break;  // Couldn't determine anything about these constants.
@@ -1812,13 +2057,15 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
 
     // If we evaluated the result, return it now.
     if (Result != -1)
-      return ConstantInt::get(Type::getInt1Ty(Context), Result);
+      return ConstantInt::get(ResultTy, Result);
 
     // If the right hand side is a bitcast, try using its inverse to simplify
-    // it by moving it to the left hand side.
+    // it by moving it to the left hand side.  We can't do this if it would turn
+    // a vector compare into a scalar compare or visa versa.
     if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
-      if (CE2->getOpcode() == Instruction::BitCast) {
-        Constant *CE2Op0 = CE2->getOperand(0);
+      Constant *CE2Op0 = CE2->getOperand(0);
+      if (CE2->getOpcode() == Instruction::BitCast &&
+          isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) {
         Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
         return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
       }
@@ -1890,8 +2137,7 @@ static bool isInBoundsIndices(Constant *const *Idxs, size_t NumIdx) {
   return true;
 }
 
-Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, 
-                                          Constant *C,
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
                                           bool inBounds,
                                           Constant* const *Idxs,
                                           unsigned NumIdx) {
@@ -1951,10 +2197,9 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context,
         if (!Idx0->isNullValue()) {
           const Type *IdxTy = Combined->getType();
           if (IdxTy != Idx0->getType()) {
-            Constant *C1 =
-              ConstantExpr::getSExtOrBitCast(Idx0, Type::getInt64Ty(Context));
-            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, 
-                                                          Type::getInt64Ty(Context));
+            const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
             Combined = ConstantExpr::get(Instruction::Add, C1, C2);
           } else {
             Combined =
@@ -1975,7 +2220,7 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context,
     }
 
     // Implement folding of:
-    //    int* getelementptr ([2 x int]* cast ([3 x int]* %X to [2 x int]*),
+    //    int* getelementptr ([2 x int]* bitcast ([3 x int]* %X to [2 x int]*),
     //                        long 0, long 0)
     // To: int* getelementptr ([3 x int]* %X, long 0, long 0)
     //
@@ -1992,28 +2237,6 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context,
                 ConstantExpr::getGetElementPtr(
                       (Constant*)CE->getOperand(0), Idxs, NumIdx);
     }
-
-    // Fold: getelementptr (i8* inttoptr (i64 1 to i8*), i32 -1)
-    // Into: inttoptr (i64 0 to i8*)
-    // This happens with pointers to member functions in C++.
-    if (CE->getOpcode() == Instruction::IntToPtr && NumIdx == 1 &&
-        isa<ConstantInt>(CE->getOperand(0)) && isa<ConstantInt>(Idxs[0]) &&
-        cast<PointerType>(CE->getType())->getElementType() ==
-            Type::getInt8Ty(Context)) {
-      Constant *Base = CE->getOperand(0);
-      Constant *Offset = Idxs[0];
-
-      // Convert the smaller integer to the larger type.
-      if (Offset->getType()->getPrimitiveSizeInBits() < 
-          Base->getType()->getPrimitiveSizeInBits())
-        Offset = ConstantExpr::getSExt(Offset, Base->getType());
-      else if (Base->getType()->getPrimitiveSizeInBits() <
-               Offset->getType()->getPrimitiveSizeInBits())
-        Base = ConstantExpr::getZExt(Base, Offset->getType());
-
-      Base = ConstantExpr::getAdd(Base, Offset);
-      return ConstantExpr::getIntToPtr(Base, CE->getType());
-    }
   }
 
   // Check to see if any array indices are not within the corresponding
@@ -2043,12 +2266,12 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context,
 
             // Before adding, extend both operands to i64 to avoid
             // overflow trouble.
-            if (!PrevIdx->getType()->isInteger(64))
+            if (!PrevIdx->getType()->isIntegerTy(64))
               PrevIdx = ConstantExpr::getSExt(PrevIdx,
-                                              Type::getInt64Ty(Context));
-            if (!Div->getType()->isInteger(64))
+                                           Type::getInt64Ty(Div->getContext()));
+            if (!Div->getType()->isIntegerTy(64))
               Div = ConstantExpr::getSExt(Div,
-                                          Type::getInt64Ty(Context));
+                                          Type::getInt64Ty(Div->getContext()));
 
             NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
           } else {
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index cc97001..d2dbbdd 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -23,46 +23,31 @@ namespace llvm {
   class Value;
   class Constant;
   class Type;
-  class LLVMContext;
 
   // Constant fold various types of instruction...
   Constant *ConstantFoldCastInstruction(
-    LLVMContext &Context,
     unsigned opcode,     ///< The opcode of the cast
     Constant *V,         ///< The source constant
     const Type *DestTy   ///< The destination type
   );
-  Constant *ConstantFoldSelectInstruction(LLVMContext &Context,
-                                          Constant *Cond,
+  Constant *ConstantFoldSelectInstruction(Constant *Cond,
                                           Constant *V1, Constant *V2);
-  Constant *ConstantFoldExtractElementInstruction(LLVMContext &Context,
-                                                  Constant *Val,
-                                                  Constant *Idx);
-  Constant *ConstantFoldInsertElementInstruction(LLVMContext &Context,
-                                                 Constant *Val,
-                                                 Constant *Elt,
+  Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+  Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt,
                                                  Constant *Idx);
-  Constant *ConstantFoldShuffleVectorInstruction(LLVMContext &Context,
-                                                 Constant *V1,
-                                                 Constant *V2,
+  Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
                                                  Constant *Mask);
-  Constant *ConstantFoldExtractValueInstruction(LLVMContext &Context,
-                                                Constant *Agg,
+  Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
                                                 const unsigned *Idxs,
                                                 unsigned NumIdx);
-  Constant *ConstantFoldInsertValueInstruction(LLVMContext &Context,
-                                               Constant *Agg,
-                                               Constant *Val,
+  Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
                                                const unsigned *Idxs,
                                                unsigned NumIdx);
-  Constant *ConstantFoldBinaryInstruction(LLVMContext &Context,
-                                          unsigned Opcode, Constant *V1,
+  Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
                                           Constant *V2);
-  Constant *ConstantFoldCompareInstruction(LLVMContext &Context,
-                                           unsigned short predicate, 
+  Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
                                            Constant *C1, Constant *C2);
-  Constant *ConstantFoldGetElementPtr(LLVMContext &Context, Constant *C,
-                                      bool inBounds,
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
                                       Constant* const *Idxs, unsigned NumIdx);
 } // End llvm namespace
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 916aac6..98040ea 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -228,8 +228,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
 /// type, returns the elements of the vector in the specified smallvector.
 /// This handles breaking down a vector undef into undef elements, etc.  For
 /// constant exprs and other cases we can't handle, we return an empty vector.
-void Constant::getVectorElements(LLVMContext &Context,
-                                 SmallVectorImpl<Constant*> &Elts) const {
+void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
   assert(isa<VectorType>(getType()) && "Not a vector constant!");
   
   if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
@@ -405,13 +404,13 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
 
 Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
   if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
-    if (PTy->getElementType()->isFloatingPoint()) {
+    if (PTy->getElementType()->isFloatingPointTy()) {
       std::vector<Constant*> zeros(PTy->getNumElements(),
                            getNegativeZero(PTy->getElementType()));
       return ConstantVector::get(PTy, zeros);
     }
 
-  if (Ty->isFloatingPoint()) 
+  if (Ty->isFloatingPointTy()) 
     return getNegativeZero(Ty);
 
   return Constant::getNullValue(Ty);
@@ -586,6 +585,27 @@ Constant* ConstantStruct::get(LLVMContext &Context,
   return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
 }
 
+ConstantUnion::ConstantUnion(const UnionType *T, Constant* V)
+  : Constant(T, ConstantUnionVal,
+             OperandTraits<ConstantUnion>::op_end(this) - 1, 1) {
+  Use *OL = OperandList;
+  assert(T->getElementTypeIndex(V->getType()) >= 0 &&
+      "Initializer for union element isn't a member of union type!");
+  *OL = V;
+}
+
+// ConstantUnion accessors.
+Constant* ConstantUnion::get(const UnionType* T, Constant* V) {
+  LLVMContextImpl* pImpl = T->getContext().pImpl;
+  
+  // Create a ConstantAggregateZero value if all elements are zeros...
+  if (!V->isNullValue())
+    return pImpl->UnionConstants.getOrCreate(T, V);
+
+  return ConstantAggregateZero::get(T);
+}
+
+
 ConstantVector::ConstantVector(const VectorType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantVectorVal,
@@ -641,26 +661,47 @@ Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
 }
 
 Constant* ConstantExpr::getNSWNeg(Constant* C) {
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
 }
 
+Constant* ConstantExpr::getNUWNeg(Constant* C) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NEG a nonintegral value!");
+  return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
 Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) {
   return getTy(C1->getType(), Instruction::Add, C1, C2,
                OverflowingBinaryOperator::NoSignedWrap);
 }
 
+Constant* ConstantExpr::getNUWAdd(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::Add, C1, C2,
+               OverflowingBinaryOperator::NoUnsignedWrap);
+}
+
 Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) {
   return getTy(C1->getType(), Instruction::Sub, C1, C2,
                OverflowingBinaryOperator::NoSignedWrap);
 }
 
+Constant* ConstantExpr::getNUWSub(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::Sub, C1, C2,
+               OverflowingBinaryOperator::NoUnsignedWrap);
+}
+
 Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) {
   return getTy(C1->getType(), Instruction::Mul, C1, C2,
                OverflowingBinaryOperator::NoSignedWrap);
 }
 
+Constant* ConstantExpr::getNUWMul(Constant* C1, Constant* C2) {
+  return getTy(C1->getType(), Instruction::Mul, C1, C2,
+               OverflowingBinaryOperator::NoUnsignedWrap);
+}
+
 Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) {
   return getTy(C1->getType(), Instruction::SDiv, C1, C2,
                SDivOperator::IsExact);
@@ -928,7 +969,7 @@ void ConstantArray::destroyConstant() {
 /// if the elements of the array are all ConstantInt's.
 bool ConstantArray::isString() const {
   // Check the element type for i8...
-  if (!getType()->getElementType()->isInteger(8))
+  if (!getType()->getElementType()->isIntegerTy(8))
     return false;
   // Check the elements to make sure they are all integers, not constant
   // expressions.
@@ -943,7 +984,7 @@ bool ConstantArray::isString() const {
 /// null bytes except its terminator.
 bool ConstantArray::isCString() const {
   // Check the element type for i8...
-  if (!getType()->getElementType()->isInteger(8))
+  if (!getType()->getElementType()->isIntegerTy(8))
     return false;
 
   // Last element must be a null.
@@ -990,6 +1031,13 @@ void ConstantStruct::destroyConstant() {
 
 // destroyConstant - Remove the constant from the constant table...
 //
+void ConstantUnion::destroyConstant() {
+  getType()->getContext().pImpl->UnionConstants.remove(this);
+  destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
 void ConstantVector::destroyConstant() {
   getType()->getContext().pImpl->VectorConstants.remove(this);
   destroyConstantImpl();
@@ -1134,7 +1182,7 @@ static inline Constant *getFoldedCast(
   Instruction::CastOps opc, Constant *C, const Type *Ty) {
   assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
   // Fold a few common cases
-  if (Constant *FC = ConstantFoldCastInstruction(Ty->getContext(), opc, C, Ty))
+  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
     return FC;
 
   LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -1150,24 +1198,24 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
   Instruction::CastOps opc = Instruction::CastOps(oc);
   assert(Instruction::isCast(opc) && "opcode out of range");
   assert(C && Ty && "Null arguments to getCast");
-  assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+  assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
 
   switch (opc) {
-    default:
-      llvm_unreachable("Invalid cast opcode");
-      break;
-    case Instruction::Trunc:    return getTrunc(C, Ty);
-    case Instruction::ZExt:     return getZExt(C, Ty);
-    case Instruction::SExt:     return getSExt(C, Ty);
-    case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
-    case Instruction::FPExt:    return getFPExtend(C, Ty);
-    case Instruction::UIToFP:   return getUIToFP(C, Ty);
-    case Instruction::SIToFP:   return getSIToFP(C, Ty);
-    case Instruction::FPToUI:   return getFPToUI(C, Ty);
-    case Instruction::FPToSI:   return getFPToSI(C, Ty);
-    case Instruction::PtrToInt: return getPtrToInt(C, Ty);
-    case Instruction::IntToPtr: return getIntToPtr(C, Ty);
-    case Instruction::BitCast:  return getBitCast(C, Ty);
+  default:
+    llvm_unreachable("Invalid cast opcode");
+    break;
+  case Instruction::Trunc:    return getTrunc(C, Ty);
+  case Instruction::ZExt:     return getZExt(C, Ty);
+  case Instruction::SExt:     return getSExt(C, Ty);
+  case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
+  case Instruction::FPExt:    return getFPExtend(C, Ty);
+  case Instruction::UIToFP:   return getUIToFP(C, Ty);
+  case Instruction::SIToFP:   return getSIToFP(C, Ty);
+  case Instruction::FPToUI:   return getFPToUI(C, Ty);
+  case Instruction::FPToSI:   return getFPToSI(C, Ty);
+  case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+  case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+  case Instruction::BitCast:  return getBitCast(C, Ty);
   }
   return 0;
 } 
@@ -1192,17 +1240,17 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
 
 Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
   assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast");
+  assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return getCast(Instruction::PtrToInt, S, Ty);
   return getCast(Instruction::BitCast, S, Ty);
 }
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
                                        bool isSigned) {
-  assert(C->getType()->isIntOrIntVector() &&
-         Ty->isIntOrIntVector() && "Invalid cast");
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         Ty->isIntOrIntVectorTy() && "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
@@ -1213,7 +1261,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
 }
 
 Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -1230,8 +1278,8 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
-  assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
   assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "SrcTy must be larger than DestTy for Trunc!");
 
@@ -1244,8 +1292,8 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
-  assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
   assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for SExt!");
 
@@ -1258,8 +1306,8 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
-  assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
   assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for ZExt!");
 
@@ -1272,7 +1320,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "This is an illegal floating point truncation!");
   return getFoldedCast(Instruction::FPTrunc, C, Ty);
@@ -1284,7 +1332,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "This is an illegal floating point extension!");
   return getFoldedCast(Instruction::FPExt, C, Ty);
@@ -1296,7 +1344,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
          "This is an illegal uint to floating point cast!");
   return getFoldedCast(Instruction::UIToFP, C, Ty);
 }
@@ -1307,7 +1355,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
          "This is an illegal sint to floating point cast!");
   return getFoldedCast(Instruction::SIToFP, C, Ty);
 }
@@ -1318,7 +1366,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
          "This is an illegal floating point to uint cast!");
   return getFoldedCast(Instruction::FPToUI, C, Ty);
 }
@@ -1329,38 +1377,26 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
          "This is an illegal floating point to sint cast!");
   return getFoldedCast(Instruction::FPToSI, C, Ty);
 }
 
 Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
   assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer");
-  assert(DstTy->isInteger() && "PtrToInt destination must be integral");
+  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
   return getFoldedCast(Instruction::PtrToInt, C, DstTy);
 }
 
 Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
-  assert(C->getType()->isInteger() && "IntToPtr source must be integral");
+  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
   assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer");
   return getFoldedCast(Instruction::IntToPtr, C, DstTy);
 }
 
 Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
-  // BitCast implies a no-op cast of type only. No bits change.  However, you 
-  // can't cast pointers to anything but pointers.
-#ifndef NDEBUG
-  const Type *SrcTy = C->getType();
-  assert((isa<PointerType>(SrcTy) == isa<PointerType>(DstTy)) &&
-         "BitCast cannot cast pointer to non-pointer and vice versa");
-
-  // Now we know we're not dealing with mismatched pointer casts (ptr->nonptr
-  // or nonptr->ptr). For all the other types, the cast is okay if source and 
-  // destination bit widths are identical.
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
-#endif
-  assert(SrcBitSize == DstBitSize && "BitCast requires types of same width");
+  assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+         "Invalid constantexpr bitcast!");
   
   // It is common to ask for a bitcast of a value to its own type, handle this
   // speedily.
@@ -1380,8 +1416,7 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
          "Operand types in binary constant expression should match");
 
   if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext()))
-    if (Constant *FC = ConstantFoldBinaryInstruction(ReqTy->getContext(),
-                                                     Opcode, C1, C2))
+    if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
       return FC;          // Fold a few common cases...
 
   std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
@@ -1414,7 +1449,7 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
                             unsigned Flags) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C1->getType()->isFPOrFPVector()) {
+  if (C1->getType()->isFPOrFPVectorTy()) {
     if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
     else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
     else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
@@ -1425,51 +1460,51 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   case Instruction::Sub:
   case Instruction::Mul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an integer operation on a non-integer type!");
     break;
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create a floating-point operation on a "
            "non-floating-point type!");
     break;
   case Instruction::UDiv: 
   case Instruction::SDiv: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FDiv:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::URem: 
   case Instruction::SRem: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FRem:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a shift operation on a non-integer type!");
     break;
   default:
@@ -1491,30 +1526,35 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) {
 }
 
 Constant* ConstantExpr::getAlignOf(const Type* Ty) {
-  // alignof is implemented as: (i64) gep ({i8,Ty}*)null, 0, 1
+  // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
   const Type *AligningTy = StructType::get(Ty->getContext(),
-                                   Type::getInt8Ty(Ty->getContext()), Ty, NULL);
+                                   Type::getInt1Ty(Ty->getContext()), Ty, NULL);
   Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
-  Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0);
+  Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
   Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
   return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt32Ty(Ty->getContext()));
+                 Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+  return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+                                           FieldNo));
+}
+
+Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
   // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx[] = {
-    ConstantInt::get(Type::getInt64Ty(STy->getContext()), 0),
-    ConstantInt::get(Type::getInt32Ty(STy->getContext()), FieldNo)
+    ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+    FieldNo
   };
   Constant *GEP = getGetElementPtr(
-                Constant::getNullValue(PointerType::getUnqual(STy)), GEPIdx, 2);
+                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
   return getCast(Instruction::PtrToInt, GEP,
-                 Type::getInt64Ty(STy->getContext()));
+                 Type::getInt64Ty(Ty->getContext()));
 }
 
 Constant *ConstantExpr::getCompare(unsigned short pred, 
@@ -1528,8 +1568,7 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
 
   if (ReqTy == V1->getType())
-    if (Constant *SC = ConstantFoldSelectInstruction(
-                                                ReqTy->getContext(), C, V1, V2))
+    if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
       return SC;        // Fold common cases
 
   std::vector<Constant*> argVec(3, C);
@@ -1549,9 +1588,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
          cast<PointerType>(ReqTy)->getElementType() &&
          "GEP indices invalid!");
 
-  if (Constant *FC = ConstantFoldGetElementPtr(
-                              ReqTy->getContext(), C, /*inBounds=*/false,
-                              (Constant**)Idxs, NumIdx))
+  if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/false,
+                                               (Constant**)Idxs, NumIdx))
     return FC;          // Fold a few common cases...
 
   assert(isa<PointerType>(C->getType()) &&
@@ -1577,9 +1615,8 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
          cast<PointerType>(ReqTy)->getElementType() &&
          "GEP indices invalid!");
 
-  if (Constant *FC = ConstantFoldGetElementPtr(
-                              ReqTy->getContext(), C, /*inBounds=*/true,
-                              (Constant**)Idxs, NumIdx))
+  if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/true,
+                                               (Constant**)Idxs, NumIdx))
     return FC;          // Fold a few common cases...
 
   assert(isa<PointerType>(C->getType()) &&
@@ -1635,8 +1672,7 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
          pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
 
-  if (Constant *FC = ConstantFoldCompareInstruction(
-                                             LHS->getContext(), pred, LHS, RHS))
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
     return FC;          // Fold a few common cases...
 
   // Look up the constant in the table first to ensure uniqueness
@@ -1659,8 +1695,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   assert(LHS->getType() == RHS->getType());
   assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
 
-  if (Constant *FC = ConstantFoldCompareInstruction(
-                                            LHS->getContext(), pred, LHS, RHS))
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
     return FC;          // Fold a few common cases...
 
   // Look up the constant in the table first to ensure uniqueness
@@ -1680,8 +1715,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
 
 Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
                                             Constant *Idx) {
-  if (Constant *FC = ConstantFoldExtractElementInstruction(
-                                                ReqTy->getContext(), Val, Idx))
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
     return FC;          // Fold a few common cases.
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
@@ -1695,7 +1729,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(isa<VectorType>(Val->getType()) &&
          "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType()->isInteger(32) &&
+  assert(Idx->getType()->isIntegerTy(32) &&
          "Extractelement index must be i32 type!");
   return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
                              Val, Idx);
@@ -1703,8 +1737,7 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
 
 Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
                                            Constant *Elt, Constant *Idx) {
-  if (Constant *FC = ConstantFoldInsertElementInstruction(
-                                            ReqTy->getContext(), Val, Elt, Idx))
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
     return FC;          // Fold a few common cases.
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
@@ -1722,15 +1755,14 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
          "Tried to create insertelement operation on non-vector type!");
   assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
          && "Insertelement types must match!");
-  assert(Idx->getType()->isInteger(32) &&
+  assert(Idx->getType()->isIntegerTy(32) &&
          "Insertelement index must be i32 type!");
   return getInsertElementTy(Val->getType(), Val, Elt, Idx);
 }
 
 Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
                                            Constant *V2, Constant *Mask) {
-  if (Constant *FC = ConstantFoldShuffleVectorInstruction(
-                                            ReqTy->getContext(), V1, V2, Mask))
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
     return FC;          // Fold a few common cases...
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, V1);
@@ -1763,8 +1795,7 @@ Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
          "insertvalue type invalid!");
   assert(Agg->getType()->isFirstClassType() &&
          "Non-first-class type for constant InsertValue expression");
-  Constant *FC = ConstantFoldInsertValueInstruction(
-                                  ReqTy->getContext(), Agg, Val, Idxs, NumIdx);
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
   assert(FC && "InsertValue constant expr couldn't be folded!");
   return FC;
 }
@@ -1790,8 +1821,7 @@ Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
          "extractvalue indices invalid!");
   assert(Agg->getType()->isFirstClassType() &&
          "Non-first-class type for constant extractvalue expression");
-  Constant *FC = ConstantFoldExtractValueInstruction(
-                                        ReqTy->getContext(), Agg, Idxs, NumIdx);
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
   assert(FC && "ExtractValue constant expr couldn't be folded!");
   return FC;
 }
@@ -1809,9 +1839,9 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
 
 Constant* ConstantExpr::getNeg(Constant* C) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C->getType()->isFPOrFPVector())
+  if (C->getType()->isFPOrFPVectorTy())
     return getFNeg(C);
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
              ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1819,7 +1849,7 @@ Constant* ConstantExpr::getNeg(Constant* C) {
 }
 
 Constant* ConstantExpr::getFNeg(Constant* C) {
-  assert(C->getType()->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() &&
          "Cannot FNEG a non-floating-point value!");
   return get(Instruction::FSub,
              ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1827,7 +1857,7 @@ Constant* ConstantExpr::getFNeg(Constant* C) {
 }
 
 Constant* ConstantExpr::getNot(Constant* C) {
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NOT a nonintegral value!");
   return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
 }
@@ -2081,6 +2111,11 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   destroyConstant();
 }
 
+void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(false && "Implement replaceUsesOfWithOnConstant for unions");
+}
+
 void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
                                                  Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 08224e4..c798ba2 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -341,6 +341,13 @@ struct ConstantTraits< std::vector<T, Alloc> > {
   }
 };
 
+template<>
+struct ConstantTraits<Constant *> {
+  static unsigned uses(Constant * const & v) {
+    return 1;
+  }
+};
+
 template<class ConstantClass, class TypeClass, class ValType>
 struct ConstantCreator {
   static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
@@ -470,6 +477,14 @@ struct ConstantKeyData<ConstantStruct> {
   }
 };
 
+template<>
+struct ConstantKeyData<ConstantUnion> {
+  typedef Constant* ValType;
+  static ValType getValType(ConstantUnion *CU) {
+    return cast<Constant>(CU->getOperand(0));
+  }
+};
+
 // ConstantPointerNull does not take extra "value" argument...
 template<class ValType>
 struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 984d245..a044fc5 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -20,12 +20,13 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/TypeSymbolTable.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
@@ -140,6 +141,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMFunctionTypeKind;
   case Type::StructTyID:
     return LLVMStructTypeKind;
+  case Type::UnionTyID:
+    return LLVMUnionTypeKind;
   case Type::ArrayTyID:
     return LLVMArrayTypeKind;
   case Type::PointerTyID:
@@ -298,6 +301,35 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isPacked();
 }
 
+/*--.. Operations on union types ..........................................--*/
+
+LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                                   unsigned ElementCount) {
+  SmallVector<const Type*, 8> Tys;
+  for (LLVMTypeRef *I = ElementTypes,
+                   *E = ElementTypes + ElementCount; I != E; ++I)
+    Tys.push_back(unwrap(*I));
+  
+  return wrap(UnionType::get(&Tys[0], Tys.size()));
+}
+
+LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, int Packed) {
+  return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                ElementCount);
+}
+
+unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) {
+  return unwrap<UnionType>(UnionTy)->getNumElements();
+}
+
+void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) {
+  UnionType *Ty = unwrap<UnionType>(UnionTy);
+  for (FunctionType::param_iterator I = Ty->element_begin(),
+                                    E = Ty->element_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
 /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
 
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -932,8 +964,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
     return LLVMDLLExportLinkage;
   case GlobalValue::ExternalWeakLinkage:
     return LLVMExternalWeakLinkage;
-  case GlobalValue::GhostLinkage:
-    return LLVMGhostLinkage;
   case GlobalValue::CommonLinkage:
     return LLVMCommonLinkage;
   }
@@ -988,7 +1018,8 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
     GV->setLinkage(GlobalValue::ExternalWeakLinkage);
     break;
   case LLVMGhostLinkage:
-    GV->setLinkage(GlobalValue::GhostLinkage);
+    DEBUG(errs()
+          << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
     break;
   case LLVMCommonLinkage:
     GV->setLinkage(GlobalValue::CommonLinkage);
@@ -1965,7 +1996,7 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
 
 LLVMModuleProviderRef
 LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
-  return wrap(new ExistingModuleProvider(unwrap(M)));
+  return reinterpret_cast<LLVMModuleProviderRef>(M);
 }
 
 void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
diff --git a/lib/VMCore/GVMaterializer.cpp b/lib/VMCore/GVMaterializer.cpp
new file mode 100644
index 0000000..f77a9c9
--- /dev/null
+++ b/lib/VMCore/GVMaterializer.cpp
@@ -0,0 +1,18 @@
+//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for materializing
+// GlobalValues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GVMaterializer.h"
+using namespace llvm;
+
+GVMaterializer::~GVMaterializer() {}
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 94bf3de..489ec65 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -43,6 +43,19 @@ static bool removeDeadUsersOfConstant(const Constant *C) {
   return true;
 }
 
+bool GlobalValue::isMaterializable() const {
+  return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+  return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+  return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+  getParent()->Dematerialize(this);
+}
+
 /// removeDeadConstantUsers - If there are any dead constant users dangling
 /// off of this global value, remove them.  This method is useful for clients
 /// that want to check to see if a global is unused, but don't want to deal
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 699bf0f..9f2786e 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -19,7 +19,7 @@
 using namespace llvm;
 
 /// CreateGlobalString - Make a new global variable with an initializer that
-/// has array of i8 type filled in the the nul terminated string value
+/// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
 Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index e2b920e..9d5f7a5 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -787,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
 
 void BranchInst::AssertOK() {
   if (isConditional())
-    assert(getCondition()->getType()->isInteger(1) &&
+    assert(getCondition()->getType()->isIntegerTy(1) &&
            "May only branch on boolean predicates!");
 }
 
@@ -892,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
   else {
     assert(!isa<BasicBlock>(Amt) &&
            "Passed basic block into allocation size parameter! Use other ctor");
-    assert(Amt->getType()->isInteger(32) &&
+    assert(Amt->getType()->isIntegerTy(32) &&
            "Allocation array size is not a 32-bit integer!");
   }
   return Amt;
@@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
 
 
 bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32))
+  if (!isa<VectorType>(Val->getType()) || !Index->getType()->isIntegerTy(32))
     return false;
   return true;
 }
@@ -1438,7 +1438,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
   if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
     return false;// Second operand of insertelement must be vector element type.
     
-  if (!Index->getType()->isInteger(32))
+  if (!Index->getType()->isIntegerTy(32))
     return false;  // Third operand of insertelement must be i32.
   return true;
 }
@@ -1490,7 +1490,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
   
   const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
   if (!isa<Constant>(Mask) || MaskTy == 0 ||
-      !MaskTy->getElementType()->isInteger(32))
+      !MaskTy->getElementType()->isIntegerTy(32))
     return false;
   return true;
 }
@@ -1632,7 +1632,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
                                              const Type *Ty) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (Ty->isFPOrFPVector()) {
+  if (Ty->isFPOrFPVectorTy()) {
     if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
     else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
     else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
@@ -1678,14 +1678,14 @@ void BinaryOperator::init(BinaryOps iType) {
   case Mul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isIntOrIntVector() &&
+    assert(getType()->isIntOrIntVectorTy() &&
            "Tried to create an integer operation on a non-integer type!");
     break;
   case FAdd: case FSub:
   case FMul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Tried to create a floating-point operation on a "
            "non-floating-point type!");
     break;
@@ -1693,28 +1693,28 @@ void BinaryOperator::init(BinaryOps iType) {
   case SDiv: 
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || (isa<VectorType>(getType()) && 
-            cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Incorrect operand type (not integer) for S/UDIV");
     break;
   case FDiv:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Incorrect operand type (not floating point) for FDIV");
     break;
   case URem: 
   case SRem: 
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || (isa<VectorType>(getType()) && 
-            cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Incorrect operand type (not integer) for S/UREM");
     break;
   case FRem:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Incorrect operand type (not floating point) for FREM");
     break;
   case Shl:
@@ -1722,18 +1722,18 @@ void BinaryOperator::init(BinaryOps iType) {
   case AShr:
     assert(getType() == LHS->getType() &&
            "Shift operation should return same type as operands!");
-    assert((getType()->isInteger() ||
+    assert((getType()->isIntegerTy() ||
             (isa<VectorType>(getType()) && 
-             cast<VectorType>(getType())->getElementType()->isInteger())) &&
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Tried to create a shift operation on a non-integral type!");
     break;
   case And: case Or:
   case Xor:
     assert(getType() == LHS->getType() &&
            "Logical operation should return same type as operands!");
-    assert((getType()->isInteger() ||
+    assert((getType()->isIntegerTy() ||
             (isa<VectorType>(getType()) && 
-             cast<VectorType>(getType())->getElementType()->isInteger())) &&
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   default:
@@ -1786,6 +1786,18 @@ BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
   return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
 }
 
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
 BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
                                            Instruction *InsertBefore) {
   Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
@@ -1948,7 +1960,8 @@ bool CastInst::isIntegerCast() const {
     case Instruction::Trunc:
       return true;
     case Instruction::BitCast:
-      return getOperand(0)->getType()->isInteger() && getType()->isInteger();
+      return getOperand(0)->getType()->isIntegerTy() &&
+        getType()->isIntegerTy();
   }
 }
 
@@ -2081,25 +2094,25 @@ unsigned CastInst::isEliminableCastPair(
       // no-op cast in second op implies firstOp as long as the DestTy 
       // is integer and we are not converting between a vector and a
       // non vector type.
-      if (!isa<VectorType>(SrcTy) && DstTy->isInteger())
+      if (!isa<VectorType>(SrcTy) && DstTy->isIntegerTy())
         return firstOp;
       return 0;
     case 4:
       // no-op cast in second op implies firstOp as long as the DestTy
       // is floating point.
-      if (DstTy->isFloatingPoint())
+      if (DstTy->isFloatingPointTy())
         return firstOp;
       return 0;
     case 5: 
       // no-op cast in first op implies secondOp as long as the SrcTy
       // is an integer.
-      if (SrcTy->isInteger())
+      if (SrcTy->isIntegerTy())
         return secondOp;
       return 0;
     case 6:
       // no-op cast in first op implies secondOp as long as the SrcTy
       // is a floating point.
-      if (SrcTy->isFloatingPoint())
+      if (SrcTy->isFloatingPointTy())
         return secondOp;
       return 0;
     case 7: { 
@@ -2262,10 +2275,10 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
                                       const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
   assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
 }
@@ -2275,10 +2288,10 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
                                       const Twine &Name, 
                                       Instruction *InsertBefore) {
   assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
          "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
   return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
 }
@@ -2286,7 +2299,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const Twine &Name,
                                       Instruction *InsertBefore) {
-  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
          "Invalid integer cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2300,7 +2313,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2314,7 +2327,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const Twine &Name, 
                                  Instruction *InsertBefore) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2327,7 +2340,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const Twine &Name, 
                                  BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2351,10 +2364,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
   unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   // Run through the possibilities ...
-  if (DestTy->isInteger()) {                   // Casting to integral
-    if (SrcTy->isInteger()) {                  // Casting from integral
+  if (DestTy->isIntegerTy()) {                   // Casting to integral
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
         return true;
-    } else if (SrcTy->isFloatingPoint()) {     // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
       return true;
     } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
                                                // Casting from vector
@@ -2362,10 +2375,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     } else {                                   // Casting from something else
       return isa<PointerType>(SrcTy);
     }
-  } else if (DestTy->isFloatingPoint()) {      // Casting to floating pt
-    if (SrcTy->isInteger()) {                  // Casting from integral
+  } else if (DestTy->isFloatingPointTy()) {      // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
       return true;
-    } else if (SrcTy->isFloatingPoint()) {     // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
       return true;
     } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
                                                // Casting from vector
@@ -2384,7 +2397,7 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
   } else if (isa<PointerType>(DestTy)) {        // Casting to pointer
     if (isa<PointerType>(SrcTy)) {              // Casting from pointer
       return true;
-    } else if (SrcTy->isInteger()) {            // Casting from integral
+    } else if (SrcTy->isIntegerTy()) {            // Casting from integral
       return true;
     } else {                                    // Casting from something else
       return false;
@@ -2413,8 +2426,8 @@ CastInst::getCastOpcode(
          "Only first class types are castable!");
 
   // Run through the possibilities ...
-  if (DestTy->isInteger()) {                       // Casting to integral
-    if (SrcTy->isInteger()) {                      // Casting from integral
+  if (DestTy->isIntegerTy()) {                      // Casting to integral
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
       if (DestBits < SrcBits)
         return Trunc;                               // int -> smaller int
       else if (DestBits > SrcBits) {                // its an extension
@@ -2425,7 +2438,7 @@ CastInst::getCastOpcode(
       } else {
         return BitCast;                             // Same size, No-op cast
       }
-    } else if (SrcTy->isFloatingPoint()) {          // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
       if (DestIsSigned) 
         return FPToSI;                              // FP -> sint
       else
@@ -2440,13 +2453,13 @@ CastInst::getCastOpcode(
              "Casting from a value that is not first-class type");
       return PtrToInt;                              // ptr -> int
     }
-  } else if (DestTy->isFloatingPoint()) {           // Casting to floating pt
-    if (SrcTy->isInteger()) {                      // Casting from integral
+  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
       if (SrcIsSigned)
         return SIToFP;                              // sint -> FP
       else
         return UIToFP;                              // uint -> FP
-    } else if (SrcTy->isFloatingPoint()) {          // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
       if (DestBits < SrcBits) {
         return FPTrunc;                             // FP -> smaller FP
       } else if (DestBits > SrcBits) {
@@ -2476,7 +2489,7 @@ CastInst::getCastOpcode(
   } else if (isa<PointerType>(DestTy)) {
     if (isa<PointerType>(SrcTy)) {
       return BitCast;                               // ptr -> ptr
-    } else if (SrcTy->isInteger()) {
+    } else if (SrcTy->isIntegerTy()) {
       return IntToPtr;                              // int -> ptr
     } else {
       assert(!"Casting pointer to other than pointer or int");
@@ -2504,7 +2517,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
 
   // Check for type sanity on the arguments
   const Type *SrcTy = S->getType();
-  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType())
+  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+      SrcTy->isAggregateType() || DstTy->isAggregateType())
     return false;
 
   // Get the size of the types in bits, we'll need this later
@@ -2515,46 +2529,46 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
   switch (op) {
   default: return false; // This is an input error
   case Instruction::Trunc:
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize;
   case Instruction::ZExt:
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
   case Instruction::SExt: 
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
   case Instruction::FPTrunc:
-    return SrcTy->isFPOrFPVector() &&
-           DstTy->isFPOrFPVector() && 
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
            SrcBitSize > DstBitSize;
   case Instruction::FPExt:
-    return SrcTy->isFPOrFPVector() &&
-           DstTy->isFPOrFPVector() && 
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
            SrcBitSize < DstBitSize;
   case Instruction::UIToFP:
   case Instruction::SIToFP:
     if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
       if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-        return SVTy->getElementType()->isIntOrIntVector() &&
-               DVTy->getElementType()->isFPOrFPVector() &&
+        return SVTy->getElementType()->isIntOrIntVectorTy() &&
+               DVTy->getElementType()->isFPOrFPVectorTy() &&
                SVTy->getNumElements() == DVTy->getNumElements();
       }
     }
-    return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector();
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy();
   case Instruction::FPToUI:
   case Instruction::FPToSI:
     if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
       if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-        return SVTy->getElementType()->isFPOrFPVector() &&
-               DVTy->getElementType()->isIntOrIntVector() &&
+        return SVTy->getElementType()->isFPOrFPVectorTy() &&
+               DVTy->getElementType()->isIntOrIntVectorTy() &&
                SVTy->getNumElements() == DVTy->getNumElements();
       }
     }
-    return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector();
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy();
   case Instruction::PtrToInt:
-    return isa<PointerType>(SrcTy) && DstTy->isInteger();
+    return isa<PointerType>(SrcTy) && DstTy->isIntegerTy();
   case Instruction::IntToPtr:
-    return SrcTy->isInteger() && isa<PointerType>(DstTy);
+    return SrcTy->isIntegerTy() && isa<PointerType>(DstTy);
   case Instruction::BitCast:
     // BitCast implies a no-op cast of type only. No bits change.
     // However, you can't cast pointers to anything but pointers.
@@ -2865,25 +2879,53 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
   default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
   case ICmpInst::ICMP_EQ: Upper++; break;
   case ICmpInst::ICMP_NE: Lower++; break;
-  case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); break;
-  case ICmpInst::ICMP_SLT: Lower = APInt::getSignedMinValue(BitWidth); break;
+  case ICmpInst::ICMP_ULT:
+    Lower = APInt::getMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SLT:
+    Lower = APInt::getSignedMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
   case ICmpInst::ICMP_UGT: 
     Lower++; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
     break;
   case ICmpInst::ICMP_SGT:
     Lower++; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
     break;
   case ICmpInst::ICMP_ULE: 
     Lower = APInt::getMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
     break;
   case ICmpInst::ICMP_SLE: 
     Lower = APInt::getSignedMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
     break;
   case ICmpInst::ICMP_UGE:
     Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
     break;
   case ICmpInst::ICMP_SGE:
     Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
     break;
   }
   return ConstantRange(Lower, Upper);
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index ccca789..62491d8 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -116,6 +116,10 @@ public:
     ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
+  typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion>
+      UnionConstantsTy;
+  UnionConstantsTy UnionConstants;
+  
   typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
                             ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
@@ -159,12 +163,16 @@ public:
   TypeMap<PointerValType, PointerType> PointerTypes;
   TypeMap<FunctionValType, FunctionType> FunctionTypes;
   TypeMap<StructValType, StructType> StructTypes;
+  TypeMap<UnionValType, UnionType> UnionTypes;
   TypeMap<IntegerValType, IntegerType> IntegerTypes;
 
   // Opaque types are not structurally uniqued, so don't use TypeMap.
   typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
   OpaqueTypesTy OpaqueTypes;
-  
+
+  /// Used as an abstract type that will never be resolved.
+  OpaqueType *const AlwaysOpaqueTy;
+
 
   /// ValueHandles - This map keeps track of all of the value handles that are
   /// watching a Value*.  The Value::HasValueHandle bit is used to know
@@ -196,7 +204,12 @@ public:
     Int8Ty(C, 8),
     Int16Ty(C, 16),
     Int32Ty(C, 32),
-    Int64Ty(C, 64) { }
+    Int64Ty(C, 64),
+    AlwaysOpaqueTy(new OpaqueType(C)) {
+    // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
+    AlwaysOpaqueTy->addRef();
+    OpaqueTypes.insert(AlwaysOpaqueTy);
+  }
 
   ~LLVMContextImpl() {
     ExprConstants.freeConstants();
@@ -217,6 +230,7 @@ public:
         delete I->second;
     }
     MDNodeSet.clear();
+    AlwaysOpaqueTy->dropRef();
     for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
         I != E; ++I) {
       (*I)->AbstractTypeUsers.clear();
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index ecadaee..bc5e77d 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMCore
 BUILD_ARCHIVE = 1
-#CXXFLAGS = -fno-rtti
+REQUIRES_RTTI = 1
 
 BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
 
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ee8e713..07a5f3c 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -186,43 +186,50 @@ void MDNode::destroy() {
 }
 
 MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
-                          unsigned NumVals, FunctionLocalness FL) {
+                          unsigned NumVals, FunctionLocalness FL,
+                          bool Insert) {
   LLVMContextImpl *pImpl = Context.pImpl;
   FoldingSetNodeID ID;
   for (unsigned i = 0; i != NumVals; ++i)
     ID.AddPointer(Vals[i]);
 
   void *InsertPoint;
-  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-  if (!N) {
-    bool isFunctionLocal = false;
-    switch (FL) {
-    case FL_Unknown:
-      for (unsigned i = 0; i != NumVals; ++i) {
-        Value *V = Vals[i];
-        if (!V) continue;
-        if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
-            (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) {
-          isFunctionLocal = true;
-          break;
-        }
+  MDNode *N = NULL;
+  
+  if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+    return N;
+    
+  if (!Insert)
+    return NULL;
+    
+  bool isFunctionLocal = false;
+  switch (FL) {
+  case FL_Unknown:
+    for (unsigned i = 0; i != NumVals; ++i) {
+      Value *V = Vals[i];
+      if (!V) continue;
+      if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+          (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) {
+        isFunctionLocal = true;
+        break;
       }
-      break;
-    case FL_No:
-      isFunctionLocal = false;
-      break;
-    case FL_Yes:
-      isFunctionLocal = true;
-      break;
     }
+    break;
+  case FL_No:
+    isFunctionLocal = false;
+    break;
+  case FL_Yes:
+    isFunctionLocal = true;
+    break;
+  }
 
-    // Coallocate space for the node and Operands together, then placement new.
-    void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
-    N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
+  // Coallocate space for the node and Operands together, then placement new.
+  void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
+  N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(N, InsertPoint);
 
-    // InsertPoint will have been set by the FindNodeOrInsertPos call.
-    pImpl->MDNodeSet.InsertNode(N, InsertPoint);
-  }
   return N;
 }
 
@@ -230,11 +237,16 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
   return getMDNode(Context, Vals, NumVals, FL_Unknown);
 }
 
-MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value*const* Vals,
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
                                       unsigned NumVals, bool isFunctionLocal) {
   return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No);
 }
 
+MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals,
+                            unsigned NumVals) {
+  return getMDNode(Context, Vals, NumVals, FL_Unknown, false);
+}
+
 /// getOperand - Return specified operand.
 Value *MDNode::getOperand(unsigned i) const {
   return *getOperandPtr(const_cast<MDNode*>(this), i);
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 503e708..001bb00 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -15,6 +15,7 @@
 #include "llvm/InstrTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/GVMaterializer.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -56,7 +57,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
 //
 
 Module::Module(StringRef MID, LLVMContext& C)
-  : Context(C), ModuleID(MID), DataLayout("")  {
+  : Context(C), Materializer(NULL), ModuleID(MID), DataLayout("")  {
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
   NamedMDSymTab = new MDSymbolTable();
@@ -372,6 +373,52 @@ std::string Module::getTypeName(const Type *Ty) const {
 }
 
 //===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+  assert(!Materializer &&
+         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
+         " to clear it out before setting another one.");
+  Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isMaterializable(GV);
+  return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isDematerializable(GV);
+  return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  if (Materializer)
+    return Materializer->Materialize(GV, ErrInfo);
+  return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+  if (Materializer)
+    return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+  if (!Materializer)
+    return false;
+  return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+  if (MaterializeAll(ErrInfo))
+    return true;
+  Materializer.reset();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
 // Other module related stuff.
 //
 
diff --git a/lib/VMCore/ModuleProvider.cpp b/lib/VMCore/ModuleProvider.cpp
deleted file mode 100644
index cfff97c..0000000
--- a/lib/VMCore/ModuleProvider.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- ModuleProvider.cpp - Base implementation for module providers -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Minimal implementation of the abstract interface for providing a module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ModuleProvider.h"
-#include "llvm/Module.h"
-using namespace llvm;
-
-/// ctor - always have a valid Module
-///
-ModuleProvider::ModuleProvider() : TheModule(0) { }
-
-/// dtor - when we leave, we take our Module with us
-///
-ModuleProvider::~ModuleProvider() {
-  delete TheModule;
-}
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 45000f2..a782e5a 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Debug.h"
@@ -195,6 +194,9 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
 //
 namespace {
 class PassRegistrar {
+  /// Guards the contents of this class.
+  mutable sys::SmartMutex<true> Lock;
+
   /// PassInfoMap - Keep track of the passinfo object for each registered llvm
   /// pass.
   typedef std::map<intptr_t, const PassInfo*> MapType;
@@ -214,16 +216,19 @@ class PassRegistrar {
 public:
   
   const PassInfo *GetPassInfo(intptr_t TI) const {
+    sys::SmartScopedLock<true> Guard(Lock);
     MapType::const_iterator I = PassInfoMap.find(TI);
     return I != PassInfoMap.end() ? I->second : 0;
   }
   
   const PassInfo *GetPassInfo(StringRef Arg) const {
+    sys::SmartScopedLock<true> Guard(Lock);
     StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
     return I != PassInfoStringMap.end() ? I->second : 0;
   }
   
   void RegisterPass(const PassInfo &PI) {
+    sys::SmartScopedLock<true> Guard(Lock);
     bool Inserted =
       PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
     assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
@@ -231,6 +236,7 @@ public:
   }
   
   void UnregisterPass(const PassInfo &PI) {
+    sys::SmartScopedLock<true> Guard(Lock);
     MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
     assert(I != PassInfoMap.end() && "Pass registered but not in map!");
     
@@ -240,6 +246,7 @@ public:
   }
   
   void EnumerateWith(PassRegistrationListener *L) {
+    sys::SmartScopedLock<true> Guard(Lock);
     for (MapType::const_iterator I = PassInfoMap.begin(),
          E = PassInfoMap.end(); I != E; ++I)
       L->passEnumerate(I->second);
@@ -250,6 +257,7 @@ public:
   void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
                              const PassInfo *ImplementationInfo,
                              bool isDefault) {
+    sys::SmartScopedLock<true> Guard(Lock);
     AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
     assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
            "Cannot add a pass to the same analysis group more than once!");
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 0c0d64e..8a3527e 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1194,15 +1193,13 @@ bool BBPassManager::doFinalization(Function &F) {
 // FunctionPassManager implementation
 
 /// Create new Function pass manager
-FunctionPassManager::FunctionPassManager(ModuleProvider *P) {
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
   FPM = new FunctionPassManagerImpl(0);
   // FPM is the top level manager.
   FPM->setTopLevelManager(FPM);
 
   AnalysisResolver *AR = new AnalysisResolver(*FPM);
   FPM->setResolver(AR);
-  
-  MP = P;
 }
 
 FunctionPassManager::~FunctionPassManager() {
@@ -1223,9 +1220,11 @@ void FunctionPassManager::add(Pass *P) {
 /// so, return true.
 ///
 bool FunctionPassManager::run(Function &F) {
-  std::string errstr;
-  if (MP->materializeFunction(&F, &errstr)) {
-    llvm_report_error("Error reading bitcode file: " + errstr);
+  if (F.isMaterializable()) {
+    std::string errstr;
+    if (F.Materialize(&errstr)) {
+      llvm_report_error("Error reading bitcode file: " + errstr);
+    }
   }
   return FPM->run(F);
 }
@@ -1234,13 +1233,13 @@ bool FunctionPassManager::run(Function &F) {
 /// doInitialization - Run all of the initializers for the function passes.
 ///
 bool FunctionPassManager::doInitialization() {
-  return FPM->doInitialization(*MP->getModule());
+  return FPM->doInitialization(*M);
 }
 
 /// doFinalization - Run all of the finalizers for the function passes.
 ///
 bool FunctionPassManager::doFinalization() {
-  return FPM->doFinalization(*MP->getModule());
+  return FPM->doFinalization(*M);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 044de4f..f4cd366 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -50,8 +50,8 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
 
 /// Because of the way Type subclasses are allocated, this function is necessary
 /// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy) allocate additional space after 
-/// the space for their derived type to hold the contained types array of
+/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space
+/// after the space for their derived type to hold the contained types array of
 /// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
 /// allocated with the type object, decreasing allocations and eliminating the
 /// need for a std::vector to be used in the Type class itself. 
@@ -61,7 +61,8 @@ void Type::destroy() const {
   // Structures and Functions allocate their contained types past the end of
   // the type object itself. These need to be destroyed differently than the
   // other types.
-  if (isa<FunctionType>(this) || isa<StructType>(this)) {
+  if (isa<FunctionType>(this) || isa<StructType>(this) ||
+      isa<UnionType>(this)) {
     // First, make sure we destruct any PATypeHandles allocated by these
     // subclasses.  They must be manually destructed. 
     for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -71,8 +72,10 @@ void Type::destroy() const {
     // to delete this as an array of char.
     if (isa<FunctionType>(this))
       static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
-    else
+    else if (isa<StructType>(this))
       static_cast<const StructType*>(this)->StructType::~StructType();
+    else
+      static_cast<const UnionType*>(this)->UnionType::~UnionType();
 
     // Finally, remove the memory as an array deallocation of the chars it was
     // constructed from.
@@ -124,32 +127,32 @@ const Type *Type::getScalarType() const {
   return this;
 }
 
-/// isInteger - Return true if this is an IntegerType of the specified width.
-bool Type::isInteger(unsigned Bitwidth) const {
-  return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
 }
 
-/// isIntOrIntVector - Return true if this is an integer type or a vector of
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
 /// integer types.
 ///
-bool Type::isIntOrIntVector() const {
-  if (isInteger())
+bool Type::isIntOrIntVectorTy() const {
+  if (isIntegerTy())
     return true;
   if (ID != Type::VectorTyID) return false;
   
-  return cast<VectorType>(this)->getElementType()->isInteger();
+  return cast<VectorType>(this)->getElementType()->isIntegerTy();
 }
 
-/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types.
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
 ///
-bool Type::isFPOrFPVector() const {
+bool Type::isFPOrFPVectorTy() const {
   if (ID == Type::FloatTyID || ID == Type::DoubleTyID || 
       ID == Type::FP128TyID || ID == Type::X86_FP80TyID || 
       ID == Type::PPC_FP128TyID)
     return true;
   if (ID != Type::VectorTyID) return false;
   
-  return cast<VectorType>(this)->getElementType()->isFloatingPoint();
+  return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
 }
 
 // canLosslesslyBitCastTo - Return true if this type can be converted to
@@ -204,7 +207,7 @@ unsigned Type::getScalarSizeInBits() const {
 int Type::getFPMantissaWidth() const {
   if (const VectorType *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType()->getFPMantissaWidth();
-  assert(isFloatingPoint() && "Not a floating point type!");
+  assert(isFloatingPointTy() && "Not a floating point type!");
   if (ID == FloatTyID) return 24;
   if (ID == DoubleTyID) return 53;
   if (ID == X86_FP80TyID) return 64;
@@ -226,7 +229,7 @@ bool Type::isSizedDerivedType() const {
   if (const VectorType *PTy = dyn_cast<VectorType>(this))
     return PTy->getElementType()->isSized();
 
-  if (!isa<StructType>(this)) 
+  if (!isa<StructType>(this) && !isa<UnionType>(this)) 
     return false;
 
   // Okay, our struct is sized if all of the elements are...
@@ -285,7 +288,7 @@ std::string Type::getDescription() const {
 
 bool StructType::indexValid(const Value *V) const {
   // Structure indexes require 32-bit integer constants.
-  if (V->getType()->isInteger(32))
+  if (V->getType()->isIntegerTy(32))
     if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
       return indexValid(CU->getZExtValue());
   return false;
@@ -308,6 +311,32 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
   return ContainedTys[Idx];
 }
 
+
+bool UnionType::indexValid(const Value *V) const {
+  // Union indexes require 32-bit integer constants.
+  if (V->getType()->isIntegerTy(32))
+    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+      return indexValid(CU->getZExtValue());
+  return false;
+}
+
+bool UnionType::indexValid(unsigned V) const {
+  return V < NumContainedTys;
+}
+
+// getTypeAtIndex - Given an index value into the type, return the type of the
+// element.  For a structure type, this must be a constant value...
+//
+const Type *UnionType::getTypeAtIndex(const Value *V) const {
+  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+  return getTypeAtIndex(Idx);
+}
+
+const Type *UnionType::getTypeAtIndex(unsigned Idx) const {
+  assert(indexValid(Idx) && "Invalid structure index!");
+  return ContainedTys[Idx];
+}
+
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
@@ -463,6 +492,23 @@ StructType::StructType(LLVMContext &C,
   setAbstract(isAbstract);
 }
 
+UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes)
+  : CompositeType(C, UnionTyID) {
+  ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
+  NumContainedTys = NumTypes;
+  bool isAbstract = false;
+  for (unsigned i = 0; i < NumTypes; ++i) {
+    assert(Types[i] && "<null> type for union field!");
+    assert(isValidElementType(Types[i]) &&
+           "Invalid type for union element!");
+    new (&ContainedTys[i]) PATypeHandle(Types[i], this);
+    isAbstract |= Types[i]->isAbstract();
+  }
+
+  // Calculate whether or not this type is abstract
+  setAbstract(isAbstract);
+}
+
 ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
   : SequentialType(ArrayTyID, ElType) {
   NumElements = NumEl;
@@ -507,30 +553,7 @@ void DerivedType::dropAllTypeUses() {
   if (NumContainedTys != 0) {
     // The type must stay abstract.  To do this, we insert a pointer to a type
     // that will never get resolved, thus will always be abstract.
-    static Type *AlwaysOpaqueTy = 0;
-    static PATypeHolder* Holder = 0;
-    Type *tmp = AlwaysOpaqueTy;
-    if (llvm_is_multithreaded()) {
-      sys::MemoryFence();
-      if (!tmp) {
-        llvm_acquire_global_lock();
-        tmp = AlwaysOpaqueTy;
-        if (!tmp) {
-          tmp = OpaqueType::get(getContext());
-          PATypeHolder* tmp2 = new PATypeHolder(tmp);
-          sys::MemoryFence();
-          AlwaysOpaqueTy = tmp;
-          Holder = tmp2;
-        }
-      
-        llvm_release_global_lock();
-      }
-    } else if (!AlwaysOpaqueTy) {
-      AlwaysOpaqueTy = OpaqueType::get(getContext());
-      Holder = new PATypeHolder(AlwaysOpaqueTy);
-    } 
-        
-    ContainedTys[0] = AlwaysOpaqueTy;
+    ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
 
     // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
     // pick so long as it doesn't point back to this type.  We choose something
@@ -667,6 +690,13 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
       if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
         return false;
     return true;
+  } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+    const UnionType *UTy2 = cast<UnionType>(Ty2);
+    if (UTy->getNumElements() != UTy2->getNumElements()) return false;
+    for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i)
+      if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes))
+        return false;
+    return true;
   } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     const ArrayType *ATy2 = cast<ArrayType>(Ty2);
     return ATy->getNumElements() == ATy2->getNumElements() &&
@@ -881,7 +911,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
 }
 
 bool VectorType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->isInteger() || ElemTy->isFloatingPoint() ||
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
          isa<OpaqueType>(ElemTy);
 }
 
@@ -924,10 +954,64 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
-         ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !isa<FunctionType>(ElemTy);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Union Type Factory...
+//
+
+UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) {
+  assert(NumTypes > 0 && "union must have at least one member type!");
+  UnionValType UTV(Types, NumTypes);
+  UnionType *UT = 0;
+  
+  LLVMContextImpl *pImpl = Types[0]->getContext().pImpl;
+  
+  UT = pImpl->UnionTypes.get(UTV);
+    
+  if (!UT) {
+    // Value not found.  Derive a new type!
+    UT = (UnionType*) operator new(sizeof(UnionType) +
+                                   sizeof(PATypeHandle) * NumTypes);
+    new (UT) UnionType(Types[0]->getContext(), Types, NumTypes);
+    pImpl->UnionTypes.add(UTV, UT);
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *UT << "\n");
+#endif
+  return UT;
+}
+
+UnionType *UnionType::get(const Type *type, ...) {
+  va_list ap;
+  SmallVector<const llvm::Type*, 8> UnionFields;
+  va_start(ap, type);
+  while (type) {
+    UnionFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  unsigned NumTypes = UnionFields.size();
+  assert(NumTypes > 0 && "union must have at least one member type!");
+  return llvm::UnionType::get(&UnionFields[0], NumTypes);
 }
 
+bool UnionType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+int UnionType::getElementTypeIndex(const Type *ElemTy) const {
+  int index = 0;
+  for (UnionType::element_iterator I = element_begin(), E = element_end();
+       I != E; ++I, ++index) {
+     if (ElemTy == *I) return index;
+  }
+  
+  return -1;
+}
 
 //===----------------------------------------------------------------------===//
 // Pointer Type Factory...
@@ -1192,6 +1276,21 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
 // concrete - this could potentially change us from an abstract type to a
 // concrete type.
 //
+void UnionType::refineAbstractType(const DerivedType *OldType,
+                                    const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void UnionType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
 void PointerType::refineAbstractType(const DerivedType *OldType,
                                      const Type *NewType) {
   LLVMContextImpl *pImpl = OldType->getContext().pImpl;
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 93a801b..02ab113 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -68,7 +68,7 @@ static unsigned getSubElementHash(const Type *Ty) {
 class IntegerValType {
   uint32_t bits;
 public:
-  IntegerValType(uint16_t numbits) : bits(numbits) {}
+  IntegerValType(uint32_t numbits) : bits(numbits) {}
 
   static IntegerValType get(const IntegerType *Ty) {
     return IntegerValType(Ty->getBitWidth());
@@ -180,6 +180,32 @@ public:
   }
 };
 
+// UnionValType - Define a class to hold the key that goes into the TypeMap
+//
+class UnionValType {
+  std::vector<const Type*> ElTypes;
+public:
+  UnionValType(const Type* const* Types, unsigned NumTypes)
+    : ElTypes(&Types[0], &Types[NumTypes]) {}
+
+  static UnionValType get(const UnionType *UT) {
+    std::vector<const Type *> ElTypes;
+    ElTypes.reserve(UT->getNumElements());
+    for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i)
+      ElTypes.push_back(UT->getElementType(i));
+
+    return UnionValType(&ElTypes[0], ElTypes.size());
+  }
+
+  static unsigned hashTypeStructure(const UnionType *UT) {
+    return UT->getNumElements();
+  }
+
+  inline bool operator<(const UnionValType &UTV) const {
+    return (ElTypes < UTV.ElTypes);
+  }
+};
+
 // FunctionValType - Define a class to hold the key that goes into the TypeMap
 //
 class FunctionValType {
@@ -216,7 +242,6 @@ protected:
   ///
   std::multimap<unsigned, PATypeHolder> TypesByHash;
 
-public:
   ~TypeMapBase() {
     // PATypeHolder won't destroy non-abstract types.
     // We can't destroy them by simply iterating, because
@@ -236,6 +261,7 @@ public:
     }
   }
 
+public:
   void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
     std::multimap<unsigned, PATypeHolder>::iterator I =
       TypesByHash.lower_bound(Hash);
@@ -281,7 +307,6 @@ class TypeMap : public TypeMapBase {
   std::map<ValType, PATypeHolder> Map;
 public:
   typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
-  ~TypeMap() { print("ON EXIT"); }
 
   inline TypeClass *get(const ValType &V) {
     iterator I = Map.find(V);
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 40679bf..3759b8a 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -341,12 +341,11 @@ Value *Value::stripPointerCasts() {
   } while (1);
 }
 
-Value *Value::getUnderlyingObject() {
+Value *Value::getUnderlyingObject(unsigned MaxLookup) {
   if (!isa<PointerType>(getType()))
     return this;
   Value *V = this;
-  unsigned MaxLookup = 6;
-  do {
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -359,7 +358,7 @@ Value *Value::getUnderlyingObject() {
       return V;
     }
     assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
-  } while (--MaxLookup);
+  }
   return V;
 }
 
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 7f9a6cd..62b9034 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -36,12 +36,12 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
 
 bool EVT::isExtendedFloatingPoint() const {
   assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isFPOrFPVector();
+  return LLVMTy->isFPOrFPVectorTy();
 }
 
 bool EVT::isExtendedInteger() const {
   assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isIntOrIntVector();
+  return LLVMTy->isIntOrIntVectorTy();
 }
 
 bool EVT::isExtendedVector() const {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 76d9d43..2b4892b 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -47,7 +47,6 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
-#include "llvm/ModuleProvider.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/TypeSymbolTable.h"
@@ -162,7 +161,8 @@ namespace {
     VerifierFailureAction action;
                           // What to do if verification fails.
     Module *Mod;          // Module we are verifying right now
-    DominatorTree *DT; // Dominator Tree, caution can be null!
+    LLVMContext *Context; // Context within which we are verifying
+    DominatorTree *DT;    // Dominator Tree, caution can be null!
 
     std::string Messages;
     raw_string_ostream MessagesStr;
@@ -179,24 +179,25 @@ namespace {
     Verifier()
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
-      DT(0), MessagesStr(Messages) {}
+      Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
     explicit Verifier(VerifierFailureAction ctn)
       : FunctionPass(&ID), 
-      Broken(false), RealPass(true), action(ctn), DT(0),
+      Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
       MessagesStr(Messages) {}
     explicit Verifier(bool AB)
       : FunctionPass(&ID), 
       Broken(false), RealPass(true),
-      action( AB ? AbortProcessAction : PrintMessageAction), DT(0),
-      MessagesStr(Messages) {}
+      action( AB ? AbortProcessAction : PrintMessageAction), Mod(0),
+      Context(0), DT(0), MessagesStr(Messages) {}
     explicit Verifier(DominatorTree &dt)
       : FunctionPass(&ID), 
-      Broken(false), RealPass(false), action(PrintMessageAction),
-      DT(&dt), MessagesStr(Messages) {}
+      Broken(false), RealPass(false), action(PrintMessageAction), Mod(0),
+      Context(0), DT(&dt), MessagesStr(Messages) {}
 
 
     bool doInitialization(Module &M) {
       Mod = &M;
+      Context = &M.getContext();
       verifyTypeSymbolTable(M.getTypeSymbolTable());
 
       // If this is a real pass, in a pass manager, we must abort before
@@ -212,6 +213,7 @@ namespace {
       if (RealPass) DT = &getAnalysis<DominatorTree>();
 
       Mod = F.getParent();
+      if (!Context) Context = &F.getContext();
 
       visit(F);
       InstsInThisBlock.clear();
@@ -315,6 +317,7 @@ namespace {
     void visitStoreInst(StoreInst &SI);
     void visitInstruction(Instruction &I);
     void visitTerminatorInst(TerminatorInst &I);
+    void visitBranchInst(BranchInst &BI);
     void visitReturnInst(ReturnInst &RI);
     void visitSwitchInst(SwitchInst &SI);
     void visitSelectInst(SelectInst &SI);
@@ -413,10 +416,10 @@ void Verifier::visit(Instruction &I) {
 
 void Verifier::visitGlobalValue(GlobalValue &GV) {
   Assert1(!GV.isDeclaration() ||
+          GV.isMaterializable() ||
           GV.hasExternalLinkage() ||
           GV.hasDLLImportLinkage() ||
           GV.hasExternalWeakLinkage() ||
-          GV.hasGhostLinkage() ||
           (isa<GlobalAlias>(GV) &&
            (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
   "Global is external, but doesn't have external or dllimport or weak linkage!",
@@ -597,6 +600,9 @@ void Verifier::visitFunction(Function &F) {
   const FunctionType *FT = F.getFunctionType();
   unsigned NumArgs = F.arg_size();
 
+  Assert1(Context == &F.getContext(),
+          "Function context does not match Module context!", &F);
+
   Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
   Assert2(FT->getNumParams() == NumArgs,
           "# formal arguments must match # of arguments for function type!",
@@ -648,9 +654,11 @@ void Verifier::visitFunction(Function &F) {
               "Function takes metadata but isn't an intrinsic", I, &F);
   }
 
-  if (F.isDeclaration()) {
+  if (F.isMaterializable()) {
+    // Function has a body somewhere we can't see.
+  } else if (F.isDeclaration()) {
     Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
-            F.hasExternalWeakLinkage() || F.hasGhostLinkage(),
+            F.hasExternalWeakLinkage(),
             "invalid linkage type for function declaration", &F);
   } else {
     // Verify that this function (which has a body) is not named "llvm.*".  It
@@ -742,6 +750,14 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) {
   visitInstruction(I);
 }
 
+void Verifier::visitBranchInst(BranchInst &BI) {
+  if (BI.isConditional()) {
+    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+  }
+  visitTerminatorInst(BI);
+}
+
 void Verifier::visitReturnInst(ReturnInst &RI) {
   Function *F = RI.getParent()->getParent();
   unsigned N = RI.getNumOperands();
@@ -820,8 +836,8 @@ void Verifier::visitTruncInst(TruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "trunc source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
@@ -835,8 +851,8 @@ void Verifier::visitZExtInst(ZExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "zext source and destination must both be a vector or neither", &I);
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
@@ -856,8 +872,8 @@ void Verifier::visitSExtInst(SExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "sext source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
@@ -873,8 +889,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "fptrunc source and destination must both be a vector or neither",&I);
   Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
@@ -891,8 +907,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "fpext source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
@@ -910,9 +926,9 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
 
   Assert1(SrcVec == DstVec,
           "UIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVector(),
+  Assert1(SrcTy->isIntOrIntVectorTy(),
           "UIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVector(),
+  Assert1(DestTy->isFPOrFPVectorTy(),
           "UIToFP result must be FP or FP vector", &I);
 
   if (SrcVec && DstVec)
@@ -933,9 +949,9 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
 
   Assert1(SrcVec == DstVec,
           "SIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVector(),
+  Assert1(SrcTy->isIntOrIntVectorTy(),
           "SIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVector(),
+  Assert1(DestTy->isFPOrFPVectorTy(),
           "SIToFP result must be FP or FP vector", &I);
 
   if (SrcVec && DstVec)
@@ -956,8 +972,9 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
 
   Assert1(SrcVec == DstVec,
           "FPToUI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVector(),
+  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+          &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
           "FPToUI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
@@ -978,9 +995,9 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
 
   Assert1(SrcVec == DstVec,
           "FPToSI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVector(),
+  Assert1(SrcTy->isFPOrFPVectorTy(),
           "FPToSI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVector(),
+  Assert1(DestTy->isIntOrIntVectorTy(),
           "FPToSI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
@@ -997,7 +1014,7 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
   const Type *DestTy = I.getType();
 
   Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I);
+  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
 
   visitInstruction(I);
 }
@@ -1007,7 +1024,7 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I);
+  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
   Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I);
 
   visitInstruction(I);
@@ -1150,7 +1167,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::UDiv:
   case Instruction::SRem:
   case Instruction::URem:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Integer arithmetic operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Integer arithmetic operators must have same type "
@@ -1163,7 +1180,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::FMul:
   case Instruction::FDiv:
   case Instruction::FRem:
-    Assert1(B.getType()->isFPOrFPVector(),
+    Assert1(B.getType()->isFPOrFPVectorTy(),
             "Floating-point arithmetic operators only work with "
             "floating-point types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
@@ -1174,7 +1191,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Logical operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Logical operators must have same type for operands and result!",
@@ -1183,7 +1200,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Shifts only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Shift return type must be same as operands!", &B);
@@ -1202,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to ICmp instruction are not of the same type!", &IC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty),
+  Assert1(Op0Ty->isIntOrIntVectorTy() || isa<PointerType>(Op0Ty),
           "Invalid operand types for ICmp instruction", &IC);
 
   visitInstruction(IC);
@@ -1215,7 +1232,7 @@ void Verifier::visitFCmpInst(FCmpInst& FC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to FCmp instruction are not of the same type!", &FC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isFPOrFPVector(),
+  Assert1(Op0Ty->isFPOrFPVectorTy(),
           "Invalid operand types for FCmp instruction", &FC);
   visitInstruction(FC);
 }
@@ -1301,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
           &AI);
   Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
           &AI);
-  Assert1(AI.getArraySize()->getType()->isInteger(32),
+  Assert1(AI.getArraySize()->getType()->isIntegerTy(32),
           "Alloca array size must be i32", &AI);
   visitInstruction(AI);
 }
@@ -1480,7 +1497,7 @@ void Verifier::visitInstruction(Instruction &I) {
 void Verifier::VerifyType(const Type *Ty) {
   if (!Types.insert(Ty)) return;
 
-  Assert1(&Mod->getContext() == &Ty->getContext(),
+  Assert1(Context == &Ty->getContext(),
           "Type context does not match Module context!", Ty);
 
   switch (Ty->getTypeID()) {
@@ -1733,7 +1750,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
       }
     }
   } else if (VT == MVT::iAny) {
-    if (!EltTy->isInteger()) {
+    if (!EltTy->isIntegerTy()) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
                   "an integer type.", F);
       return false;
@@ -1758,7 +1775,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
       break;
     }
   } else if (VT == MVT::fAny) {
-    if (!EltTy->isFloatingPoint()) {
+    if (!EltTy->isFloatingPointTy()) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
                   "a floating-point type.", F);
       return false;
@@ -1913,12 +1930,10 @@ bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
   Function &F = const_cast<Function&>(f);
   assert(!F.isDeclaration() && "Cannot verify external functions");
 
-  ExistingModuleProvider MP(F.getParent());
-  FunctionPassManager FPM(&MP);
+  FunctionPassManager FPM(F.getParent());
   Verifier *V = new Verifier(action);
   FPM.add(V);
   FPM.run(F);
-  MP.releaseModule();
   return V->Broken;
 }