123 files changed, 7267 insertions, 3363 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 205480a..7a1c049 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     --R;
     const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+    if (!MRI.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+        if (!MRI.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 32ad34a..7cde136 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
   std::pair<unsigned, unsigned> HintPair =
     VRM.getRegInfo().getRegAllocationHint(VirtReg);
+  const MachineRegisterInfo &MRI = VRM.getRegInfo();
 
   // HintPair.second is a register, phys or virt.
   Hint = HintPair.second;
@@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
     unsigned *P = new unsigned[Order.size()];
     Begin = P;
     for (unsigned i = 0; i != Order.size(); ++i)
-      if (!RCI.isReserved(Order[i]))
+      if (!MRI.isReserved(Order[i]))
         *P++ = Order[i];
     End = P;
 
@@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
 
   // The hint must be a valid physreg for allocation.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || RCI.isReserved(Hint)))
+               !RC->contains(Hint) || MRI.isReserved(Hint)))
     Hint = 0;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 447f398..5162ad7 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -21,7 +21,7 @@
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -79,7 +79,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
                            uint64_t StartingOffset) {
   // Given a struct type, recursively traverse the elements.
   if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
     for (StructType::element_iterator EB = STy->element_begin(),
                                       EI = EB,
                                       EE = STy->element_end();
@@ -91,7 +91,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
   // Given an array type, recursively traverse the elements.
   if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
       ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
                       StartingOffset + i * EltSize);
@@ -314,11 +314,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
   Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
+      AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
+      CallerRetAttr.hasAttribute(Attributes::SExt))
     return false;
 
   // Otherwise, make sure the unmodified return value of I is the return value.
@@ -354,11 +356,13 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (CallerRetAttr & ~Attribute::NoAlias)
+  if (AttrBuilder(CallerRetAttr)
+      .removeAttribute(Attributes::NoAlias).hasAttributes())
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
+      CallerRetAttr.hasAttribute(Attributes::SExt))
     return false;
 
   // Check if the only use is a function return node.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index bf5d8c4..b2ebf04 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -24,7 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d9be7a1..d74a703 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -33,7 +33,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -67,7 +67,7 @@ static gcp_map_type &getGCMap(void *&P) {
 /// getGVAlignmentLog2 - Return the alignment to use for the specified global
 /// value in log2 form.  This rounds up to the preferred alignment if possible
 /// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
                                    unsigned InBits = 0) {
   unsigned NumBits = 0;
   if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
@@ -131,9 +131,9 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
 }
 
 
-/// getTargetData - Return information about data layout.
-const TargetData &AsmPrinter::getTargetData() const {
-  return *TM.getTargetData();
+/// getDataLayout - Return information about data layout.
+const DataLayout &AsmPrinter::getDataLayout() const {
+  return *TM.getDataLayout();
 }
 
 /// getCurrentSection() - Return the current section we are emitting to.
@@ -160,7 +160,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
-  Mang = new Mangler(OutContext, *TM.getTargetData());
+  Mang = new Mangler(OutContext, *TM.getDataLayout());
 
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -213,16 +213,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
   case GlobalValue::CommonLinkage:
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
   case GlobalValue::LinkerPrivateWeakLinkage:
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
     if (MAI->getWeakDefDirective() != 0) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
 
       if ((GlobalValue::LinkageTypes)Linkage !=
-          GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+          GlobalValue::LinkOnceODRAutoHideLinkage)
         // .weak_definition _foo
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
@@ -280,7 +280,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
-  const TargetData *TD = TM.getTargetData();
+  const DataLayout *TD = TM.getDataLayout();
   uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
@@ -312,8 +312,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       return;
     }
 
-    if (MAI->getLCOMMDirectiveType() != LCOMM::None &&
-        (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) {
+    if (Align == 1 ||
+        MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
@@ -482,9 +482,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
                      "' label emitted multiple times to assembly file");
 }
 
-
-/// EmitComments - Pretty-print comments for instructions.
-static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetMachine &TM = MF->getTarget();
 
@@ -519,16 +518,16 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
     CommentOS << " Reload Reuse\n";
 }
 
-/// EmitImplicitDef - This method emits the specified machine instruction
+/// emitImplicitDef - This method emits the specified machine instruction
 /// that is an implicit def.
-static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
   unsigned RegNo = MI->getOperand(0).getReg();
   AP.OutStreamer.AddComment(Twine("implicit-def: ") +
                             AP.TM.getRegisterInfo()->getName(RegNo));
   AP.OutStreamer.AddBlankLine();
 }
 
-static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
   std::string Str = "kill:";
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
@@ -541,10 +540,10 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
   AP.OutStreamer.AddBlankLine();
 }
 
-/// EmitDebugValueComment - This method handles the target-independent form
+/// emitDebugValueComment - This method handles the target-independent form
 /// of DBG_VALUE, returning true if it was able to do so.  A false return
 /// means the target will need to handle MI in EmitInstruction.
-static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   // This code handles only the 3-operand target-independent form.
   if (MI->getNumOperands() != 3)
     return false;
@@ -674,7 +673,7 @@ void AsmPrinter::EmitFunctionBody() {
       }
 
       if (isVerbose())
-        EmitComments(*II, OutStreamer.GetCommentOS());
+        emitComments(*II, OutStreamer.GetCommentOS());
 
       switch (II->getOpcode()) {
       case TargetOpcode::PROLOG_LABEL:
@@ -690,15 +689,15 @@ void AsmPrinter::EmitFunctionBody() {
         break;
       case TargetOpcode::DBG_VALUE:
         if (isVerbose()) {
-          if (!EmitDebugValueComment(II, *this))
+          if (!emitDebugValueComment(II, *this))
             EmitInstruction(II);
         }
         break;
       case TargetOpcode::IMPLICIT_DEF:
-        if (isVerbose()) EmitImplicitDef(II, *this);
+        if (isVerbose()) emitImplicitDef(II, *this);
         break;
       case TargetOpcode::KILL:
-        if (isVerbose()) EmitKill(II, *this);
+        if (isVerbose()) emitKill(II, *this);
         break;
       default:
         if (!TM.hasMCUseLoc())
@@ -992,7 +991,7 @@ void AsmPrinter::EmitConstantPool() {
       Kind = SectionKind::getReadOnlyWithRelLocal();
       break;
     case 0:
-    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
     case 4:  Kind = SectionKind::getMergeableConst4(); break;
     case 8:  Kind = SectionKind::getMergeableConst8(); break;
     case 16: Kind = SectionKind::getMergeableConst16();break;
@@ -1038,7 +1037,7 @@ void AsmPrinter::EmitConstantPool() {
       OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
 
       Type *Ty = CPE.getType();
-      Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+      Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
       OutStreamer.EmitLabel(GetCPISymbol(CPI));
 
       if (CPE.isMachineConstantPoolEntry())
@@ -1081,7 +1080,12 @@ void AsmPrinter::EmitJumpTableInfo() {
     JTInDiffSection = true;
   }
 
-  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+
+  // Jump tables in code sections are marked with a data_region directive
+  // where that's supported.
+  if (!JTInDiffSection)
+    OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
 
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1123,6 +1127,8 @@ void AsmPrinter::EmitJumpTableInfo() {
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
       EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
   }
+  if (!JTInDiffSection)
+    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
@@ -1190,7 +1196,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 
   assert(Value && "Unknown entry kind!");
 
-  unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+  unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
   OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
 }
 
@@ -1292,7 +1298,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   }
 
   // Emit the function pointers in the target-specific order
-  const TargetData *TD = TM.getTargetData();
+  const DataLayout *TD = TM.getDataLayout();
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   std::stable_sort(Structors.begin(), Structors.end(), priority_order);
   for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
@@ -1408,7 +1414,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 // if required for correctness.
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
-  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
 
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
 
@@ -1422,9 +1428,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
 // Constant emission.
 //===----------------------------------------------------------------------===//
 
-/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
 ///
-static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
   MCContext &Ctx = AP.OutContext;
 
   if (CV->isNullValue() || isa<UndefValue>(CV))
@@ -1447,12 +1453,12 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
-    // opportunities. Attempt to fold the expression using TargetData as a
+    // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
     if (Constant *C =
-          ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+          ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
       if (C != CE)
-        return LowerConstant(C, AP);
+        return lowerConstant(C, AP);
 
     // Otherwise report the problem to the user.
     {
@@ -1464,21 +1470,20 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Generate a symbolic expression for the byte address
     const Constant *PtrVal = CE->getOperand(0);
     SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
     int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
 
-    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+    const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
     if (Offset == 0)
       return Base;
 
     // Truncate/sext the offset to the pointer size.
-    if (TD.getPointerSizeInBits() != 64) {
-      int SExtAmount = 64-TD.getPointerSizeInBits();
-      Offset = (Offset << SExtAmount) >> SExtAmount;
-    }
+    unsigned Width = TD.getPointerSizeInBits();
+    if (Width < 64)
+      Offset = SignExtend64(Offset, Width);
 
     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
                                    Ctx);
@@ -1491,26 +1496,26 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // is reasonable to treat their delta as a 32-bit value.
     // FALL THROUGH.
   case Instruction::BitCast:
-    return LowerConstant(CE->getOperand(0), AP);
+    return lowerConstant(CE->getOperand(0), AP);
 
   case Instruction::IntToPtr: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
                                       false/*ZExt*/);
-    return LowerConstant(Op, AP);
+    return lowerConstant(Op, AP);
   }
 
   case Instruction::PtrToInt: {
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
     Constant *Op = CE->getOperand(0);
     Type *Ty = CE->getType();
 
-    const MCExpr *OpExpr = LowerConstant(Op, AP);
+    const MCExpr *OpExpr = lowerConstant(Op, AP);
 
     // We can emit the pointer value into this slot if the slot is an
     // integer slot equal to the size of the pointer.
@@ -1536,8 +1541,8 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
-    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+    const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
+    const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
     switch (CE->getOpcode()) {
     default: llvm_unreachable("Unknown binary operator constant cast expr");
     case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
@@ -1554,7 +1559,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   }
 }
 
-static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
                                    AsmPrinter &AP);
 
 /// isRepeatedByteSequence - Determine whether the given value is
@@ -1578,7 +1583,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     if (CI->getBitWidth() > 64) return -1;
 
-    uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType());
+    uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
     uint64_t Value = CI->getZExtValue();
 
     // Make sure the constant is at least 8 bits long and has a power
@@ -1616,13 +1621,13 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   return -1;
 }
 
-static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
                                              unsigned AddrSpace,AsmPrinter &AP){
   
   // See if we can aggregate this into a .fill, if so, emit it as such.
   int Value = isRepeatedByteSequence(CDS, AP.TM);
   if (Value != -1) {
-    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+    uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
       return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
@@ -1672,7 +1677,7 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
     }
   }
 
-  const TargetData &TD = *AP.TM.getTargetData();
+  const DataLayout &TD = *AP.TM.getDataLayout();
   unsigned Size = TD.getTypeAllocSize(CDS->getType());
   unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
@@ -1681,28 +1686,28 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
 }
 
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
                                     AsmPrinter &AP) {
   // See if we can aggregate some values.  Make sure it can be
   // represented as a series of bytes of the constant value.
   int Value = isRepeatedByteSequence(CA, AP.TM);
 
   if (Value != -1) {
-    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+    uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
     AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
   }
   else {
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+      emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
   }
 }
 
-static void EmitGlobalConstantVector(const ConstantVector *CV,
+static void emitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+    emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
 
-  const TargetData &TD = *AP.TM.getTargetData();
+  const DataLayout &TD = *AP.TM.getDataLayout();
   unsigned Size = TD.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
@@ -1710,10 +1715,10 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
     AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 }
 
-static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+static void emitGlobalConstantStruct(const ConstantStruct *CS,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   // Print the fields in successive locations. Pad to align if needed!
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   unsigned Size = TD->getTypeAllocSize(CS->getType());
   const StructLayout *Layout = TD->getStructLayout(CS->getType());
   uint64_t SizeSoFar = 0;
@@ -1727,7 +1732,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
     SizeSoFar += FieldSize + PadSize;
 
     // Now print the actual field value.
-    EmitGlobalConstantImpl(Field, AddrSpace, AP);
+    emitGlobalConstantImpl(Field, AddrSpace, AP);
 
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
@@ -1738,7 +1743,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
          "Layout of constant struct may be incorrect!");
 }
 
-static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
   if (CFP->getType()->isHalfTy()) {
     if (AP.isVerbose()) {
@@ -1793,7 +1798,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
         << DoubleVal.convertToDouble() << '\n';
     }
 
-    if (AP.TM.getTargetData()->isBigEndian()) {
+    if (AP.TM.getDataLayout()->isBigEndian()) {
       AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
       AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
     } else {
@@ -1802,7 +1807,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
     }
 
     // Emit the tail padding for the long double.
-    const TargetData &TD = *AP.TM.getTargetData();
+    const DataLayout &TD = *AP.TM.getDataLayout();
     AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
                              TD.getTypeStoreSize(CFP->getType()), AddrSpace);
     return;
@@ -1814,7 +1819,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   // API needed to prevent premature destruction.
   APInt API = CFP->getValueAPF().bitcastToAPInt();
   const uint64_t *p = API.getRawData();
-  if (AP.TM.getTargetData()->isBigEndian()) {
+  if (AP.TM.getDataLayout()->isBigEndian()) {
     AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
     AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
   } else {
@@ -1823,9 +1828,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
   }
 }
 
-static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+static void emitGlobalConstantLargeInt(const ConstantInt *CI,
                                        unsigned AddrSpace, AsmPrinter &AP) {
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
   assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
 
@@ -1839,9 +1844,9 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
   }
 }
 
-static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
                                    AsmPrinter &AP) {
-  const TargetData *TD = AP.TM.getTargetData();
+  const DataLayout *TD = AP.TM.getDataLayout();
   uint64_t Size = TD->getTypeAllocSize(CV->getType());
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
     return AP.OutStreamer.EmitZeros(Size, AddrSpace);
@@ -1858,13 +1863,13 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
       AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
-      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+      emitGlobalConstantLargeInt(CI, AddrSpace, AP);
       return;
     }
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
-    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+    return emitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
     AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
@@ -1872,19 +1877,19 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
   }
 
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
-    return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+    return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
   
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+    return emitGlobalConstantArray(CVA, AddrSpace, AP);
 
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+    return emitGlobalConstantStruct(CVS, AddrSpace, AP);
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
     // vectors).
     if (CE->getOpcode() == Instruction::BitCast)
-      return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+      return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
 
     if (Size > 8) {
       // If the constant expression's size is greater than 64-bits, then we have
@@ -1892,23 +1897,23 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
       // that way.
       Constant *New = ConstantFoldConstantExpression(CE, TD);
       if (New && New != CE)
-        return EmitGlobalConstantImpl(New, AddrSpace, AP);
+        return emitGlobalConstantImpl(New, AddrSpace, AP);
     }
   }
   
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, AP);
+    return emitGlobalConstantVector(V, AddrSpace, AP);
     
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
+  AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
 void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
-  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
   if (Size)
-    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+    emitGlobalConstantImpl(CV, AddrSpace, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
     // If the global has zero size, emit a single byte so that two labels don't
     // look like they are at the same location.
@@ -2023,8 +2028,8 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
   }
 }
 
-/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
-static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
                                        const MachineLoopInfo *LI,
                                        const AsmPrinter &AP) {
   // Add loop depth information
@@ -2090,7 +2095,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
     if (const BasicBlock *BB = MBB->getBasicBlock())
       if (BB->hasName())
         OutStreamer.AddComment("%" + BB->getName());
-    EmitBasicBlockLoopComments(*MBB, LI, *this);
+    emitBasicBlockLoopComments(*MBB, LI, *this);
   }
 
   // Print the main label for the block.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 90d511c..d94e1fe 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -18,7 +18,7 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   
   switch (Encoding & 0x07) {
   default: llvm_unreachable("Invalid encoded value.");
-  case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
   case dwarf::DW_EH_PE_udata8: return 8;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index db43b06..50f0fc3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -43,10 +43,10 @@ namespace {
   };
 }
 
-/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
 /// inline asm has an error in it.  diagInfo is a pointer to the SrcMgrDiagInfo
 /// struct above.
-static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
   SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
   assert(DiagInfo && "Diagnostic context not passed down?");
 
@@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
 }
 
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+                               InlineAsm::AsmDialect Dialect) const {
   assert(!Str.empty() && "Can't emit empty inline asm block");
 
   // Remember if the buffer is nul terminated or not so we can avoid a copy.
@@ -91,12 +92,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
   bool HasDiagHandler = false;
   if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
-    // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+    // If the source manager has an issue, we arrange for srcMgrDiagHandler
     // to be invoked, getting DiagInfo passed into it.
     DiagInfo.LocInfo = LocMDNode;
     DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
     DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
-    SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+    SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
     HasDiagHandler = true;
   }
 
@@ -126,6 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
+  Parser->setAssemblerDialect(Dialect);
   Parser->setTargetParser(*TAP.get());
 
   // Don't implicitly switch to the text section before the asm.
@@ -135,71 +137,113 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
     report_fatal_error("Error parsing inline asm\n");
 }
 
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+                               MachineModuleInfo *MMI, int InlineAsmVariant,
+                               AsmPrinter *AP, unsigned LocCookie,
+                               raw_ostream &OS) {
+  // Switch to the inline assembly variant.
+  OS << "\t.intel_syntax\n\t";
 
-/// EmitInlineAsm - This method formats and emits the specified machine
-/// instruction that is an inline asm.
-void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
-  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
   unsigned NumOperands = MI->getNumOperands();
 
-  // Count the number of register definitions to find the asm string.
-  unsigned NumDefs = 0;
-  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
-       ++NumDefs)
-    assert(NumDefs != NumOperands-2 && "No asm string?");
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
 
-  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+      OS.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      OS << '\n';      // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
 
-  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
-  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      }
+      if (Done) break;
 
-  // If this asmstr is empty, just print the #APP/#NOAPP markers.
-  // These are useful to see where empty asm's wound up.
-  if (AsmStr[0] == 0) {
-    // Don't emit the comments if writing to a .o file.
-    if (!OutStreamer.hasRawTextSupport()) return;
+      const char *IDStart = LastEmitted;
+      const char *IDEnd = IDStart;
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
 
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmEnd());
-    return;
-  }
+      unsigned Val;
+      if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      LastEmitted = IDEnd;
 
-  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
-  // enabled, so we use EmitRawText.
-  if (OutStreamer.hasRawTextSupport())
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
+      if (Val >= NumOperands-1)
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
 
-  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
-  // it.
-  unsigned LocCookie = 0;
-  const MDNode *LocMD = 0;
-  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
-    if (MI->getOperand(i-1).isMetadata() &&
-        (LocMD = MI->getOperand(i-1).getMetadata()) &&
-        LocMD->getNumOperands() != 0) {
-      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
-        LocCookie = CI->getZExtValue();
-        break;
-      }
-    }
-  }
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      unsigned OpNo = InlineAsm::MIOp_FirstOperand;
 
-  // Emit the inline asm to a temporary string so we can emit it through
-  // EmitInlineAsm.
-  SmallString<256> StringData;
-  raw_svector_ostream OS(StringData);
+      bool Error = false;
 
-  OS << '\t';
+      // Scan to find the machine operand number for the operand.
+      for (; Val; --Val) {
+        if (OpNo >= MI->getNumOperands()) break;
+        unsigned OpFlags = MI->getOperand(OpNo).getImm();
+        OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+      }
 
-  // The variant of the current asmprinter.
-  int AsmPrinterVariant = MAI->getAssemblerDialect();
+      // We may have a location metadata attached to the end of the
+      // instruction, and at no point should see metadata at any
+      // other point while processing. It's an error if so.
+      if (OpNo >= MI->getNumOperands() ||
+          MI->getOperand(OpNo).isMetadata()) {
+        Error = true;
+      } else {
+        unsigned OpFlags = MI->getOperand(OpNo).getImm();
+        ++OpNo;  // Skip over the ID number.
+        
+        if (InlineAsm::isMemKind(OpFlags)) {
+          Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+                                            /*Modifier*/ 0, OS);
+        } else {
+          Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+                                      /*Modifier*/ 0, OS);
+        }
+      }
+      if (Error) {
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+        MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+      }
+      break;
+    }
+    }
+  }
+  OS << "\n\t.att_syntax\n" << (char)0;  // null terminate string.
+}
 
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+                                MachineModuleInfo *MMI, int InlineAsmVariant,
+                                int AsmPrinterVariant, AsmPrinter *AP,
+                                unsigned LocCookie, raw_ostream &OS) {
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
+  unsigned NumOperands = MI->getNumOperands();
+
+  OS << '\t';
 
   while (*LastEmitted) {
     switch (*LastEmitted) {
@@ -272,7 +316,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
                              " string: '" + Twine(AsmStr) + "'");
 
         std::string Val(StrStart, StrEnd);
-        PrintSpecial(MI, OS, Val.c_str());
+        AP->PrintSpecial(MI, OS, Val.c_str());
         LastEmitted = StrEnd+1;
         break;
       }
@@ -340,13 +384,12 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
             // FIXME: What if the operand isn't an MBB, report error?
             OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
           else {
-            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if (InlineAsm::isMemKind(OpFlags)) {
-              Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+              Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
                                                 Modifier[0] ? Modifier : 0,
                                                 OS);
             } else {
-              Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+              Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
                                           Modifier[0] ? Modifier : 0, OS);
             }
           }
@@ -363,7 +406,74 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
   }
   OS << '\n' << (char)0;  // null terminate string.
-  EmitInlineAsm(OS.str(), LocMD);
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+  // Count the number of register definitions to find the asm string.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, just print the #APP/#NOAPP markers.
+  // These are useful to see where empty asm's wound up.
+  if (AsmStr[0] == 0) {
+    // Don't emit the comments if writing to a .o file.
+    if (!OutStreamer.hasRawTextSupport()) return;
+
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+    return;
+  }
+
+  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  // Emit the inline asm to a temporary string so we can emit it through
+  // EmitInlineAsm.
+  SmallString<256> StringData;
+  raw_svector_ostream OS(StringData);
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+  InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+  AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+  if (InlineAsmVariant == InlineAsm::AD_ATT)
+    EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+                        AP, LocCookie, OS);
+  else
+    EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
+  EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use EmitRawText.
@@ -409,8 +519,8 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
 /// instruction, using the specified assembler variant.  Targets should
 /// override this to format as appropriate.
 bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-    unsigned AsmVariant, const char *ExtraCode,
-    raw_ostream &O) {
+                                 unsigned AsmVariant, const char *ExtraCode,
+                                 raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 3776848..4d73b3c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -17,7 +17,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -182,6 +182,12 @@ void DIEValue::dump() {
 void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   unsigned Size = ~0U;
   switch (Form) {
+  case dwarf::DW_FORM_flag_present:
+    // Emit something to keep the lines and comments in sync.
+    // FIXME: Is there a better way to do this?
+    if (Asm->OutStreamer.hasRawTextSupport())
+      Asm->OutStreamer.EmitRawText(StringRef(""));
+    return;
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
   case dwarf::DW_FORM_data1: Size = 1; break;
@@ -193,7 +199,8 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_data8: Size = 8; break;
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
-  case dwarf::DW_FORM_addr:  Size = Asm->getTargetData().getPointerSize(); break;
+  case dwarf::DW_FORM_addr:
+    Size = Asm->getDataLayout().getPointerSize(); break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
   Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -203,6 +210,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
 ///
 unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   switch (Form) {
+  case dwarf::DW_FORM_flag_present: return 0;
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
   case dwarf::DW_FORM_data1: return sizeof(int8_t);
@@ -214,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
+  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -241,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getTargetData().getPointerSize();
+  return AP->getDataLayout().getPointerSize();
 }
 
 #ifndef NDEBUG
@@ -265,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getTargetData().getPointerSize();
+  return AP->getDataLayout().getPointerSize();
 }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
index f93ea1b..28a96f3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -214,9 +214,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
 
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *) { return true; }
-
 #ifndef NDEBUG
     virtual void print(raw_ostream &O) = 0;
     void dump();
@@ -257,7 +254,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEInteger *) { return true; }
     static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
 
 #ifndef NDEBUG
@@ -286,7 +282,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIELabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
 
 #ifndef NDEBUG
@@ -313,7 +308,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEDelta *)  { return true; }
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
@@ -343,7 +337,6 @@ namespace llvm {
     }
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEEntry *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
 
 #ifndef NDEBUG
@@ -383,7 +376,6 @@ namespace llvm {
     virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
 
     // Implement isa/cast/dyncast.
-    static bool classof(const DIEBlock *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 454a923..05e0f2f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -133,8 +133,8 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
   }
 }
 
-// Walk through and emit the buckets for the table. This will look
-// like a list of numbers of how many elements are in each bucket.
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
 void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
   unsigned index = 0;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 963b8cd..92d1bbe 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -237,8 +237,8 @@ private:
     #endif
   };
 
-  DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
-  void operator=(const DwarfAccelTable&);  // DO NOT IMPLEMENT
+  DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+  void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
 
   // Internal Functions
   void EmitHeader(AsmPrinter *);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index d975f1f..4fdd5ca 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -25,7 +25,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d30e5bb..2b07dda 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
   return Value;
 }
 
+/// addFlag - Add a flag that is true.
+void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
+  if (!DD->useDarwinGDBCompat())
+    Die->addValue(Attribute, dwarf::DW_FORM_flag_present,
+                  DIEIntegerOne);
+  else
+    addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1);
+}
+
 /// addUInt - Add an unsigned integer attribute data and value.
 ///
 void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
@@ -501,7 +510,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   const char *FltPtr = (const char*)FltVal.getRawData();
 
   int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
   int Incr = (LittleEndian ? 1 : -1);
   int Start = (LittleEndian ? 0 : NumBytes - 1);
   int Stop = (LittleEndian ? NumBytes : -1);
@@ -543,7 +552,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
   const uint64_t *Ptr64 = Val.getRawData();
 
   int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
 
   // Output the constant to DWARF one byte at a time.
   for (int i = 0; i < NumBytes; i++) {
@@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         (Language == dwarf::DW_LANG_C89 ||
          Language == dwarf::DW_LANG_C99 ||
          Language == dwarf::DW_LANG_ObjC))
-      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+      addFlag(&Buffer, dwarf::DW_AT_prototyped);
   }
     break;
   case dwarf::DW_TAG_structure_type:
@@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
           addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
-          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+          addFlag(ElemDie, dwarf::DW_AT_explicit);
       }
       else if (Element.isVariable()) {
         DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
         addString(ElemDie, dwarf::DW_AT_name, DV.getName());
         addType(ElemDie, DV.getType());
-        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addFlag(ElemDie, dwarf::DW_AT_declaration);
+        addFlag(ElemDie, dwarf::DW_AT_external);
         addSourceLine(ElemDie, DV);
       } else if (Element.isDerivedType()) {
         DIDerivedType DDTy(Element);
@@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     }
 
     if (CTy.isAppleBlockExtension())
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+      addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
 
     DICompositeType ContainingType = CTy.getContainingType();
     if (DIDescriptor(ContainingType).isCompositeType())
@@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     }
 
     if (CTy.isObjcClassComplete())
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
-              dwarf::DW_FORM_flag, 1);
+      addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
 
     // Add template parameters to a class, structure or union types.
     // FIXME: The support isn't in the metadata for this yet.
@@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
 
     // If we're a forward decl, say so.
     if (CTy.isForwardDecl())
-      addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      addFlag(&Buffer, dwarf::DW_AT_declaration);
 
     // Add source line info if available.
     if (!CTy.isForwardDecl())
@@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   // AT_specification code in order to work around a bug in older
   // gdbs that requires the linkage name to resolve multiple template
   // functions.
+  // TODO: Remove this set of code when we get rid of the old gdb
+  // compatibility.
   StringRef LinkageName = SP.getLinkageName();
-  if (!LinkageName.empty())
+  if (!LinkageName.empty() && DD->useDarwinGDBCompat())
     addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
               getRealLinkageName(LinkageName));
 
@@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
     return SPDie;
   }
 
+  // Add the linkage name if we have one.
+  if (!LinkageName.empty() && !DD->useDarwinGDBCompat())
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+              getRealLinkageName(LinkageName));
+
   // Constructors and operators for anonymous aggregates do not have names.
   if (!SP.getName().empty())
     addString(SPDie, dwarf::DW_AT_name, SP.getName());
@@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
       (Language == dwarf::DW_LANG_C89 ||
        Language == dwarf::DW_LANG_C99 ||
        Language == dwarf::DW_LANG_ObjC))
-    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    addFlag(SPDie, dwarf::DW_AT_prototyped);
 
   // Add Return Type.
   DICompositeType SPTy = SP.getType();
@@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   }
 
   if (!SP.isDefinition()) {
-    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    addFlag(SPDie, dwarf::DW_AT_declaration);
     
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
@@ -1090,22 +1105,22 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i)));
+        DIType ATy = DIType(Args.getElement(i));
         addType(Arg, ATy);
         if (ATy.isArtificial())
-          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          addFlag(Arg, dwarf::DW_AT_artificial);
         SPDie->addChild(Arg);
       }
   }
 
   if (SP.isArtificial())
-    addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    addFlag(SPDie, dwarf::DW_AT_artificial);
 
   if (!SP.isLocalToUnit())
-    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    addFlag(SPDie, dwarf::DW_AT_external);
 
   if (SP.isOptimized())
-    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
 
   if (unsigned isa = Asm->getISAEncoding()) {
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
@@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
 
   // Add scoping info.
   if (!GV.isLocalToUnit())
-    addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    addFlag(VariableDIE, dwarf::DW_AT_external);
 
   // Add line number info.
   addSourceLine(VariableDIE, GV);
@@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
       addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
       addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
-      addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag,
-                     1);
+      addFlag(VariableDIE, dwarf::DW_AT_declaration);
       addDie(VariableSpecDIE);
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
@@ -1213,7 +1227,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
     addUInt(Block, 0, dwarf::DW_FORM_udata, 
-                   Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx));
+                   Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
     addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
@@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
                                         DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
   if (CTy->getTag() == dwarf::DW_TAG_vector_type)
-    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+    addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit derived type.
   addType(&Buffer, CTy->getTypeDerivedFrom());
@@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
   }
 
   if (DV->isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial,
-                        dwarf::DW_FORM_flag, 1);
+    addFlag(VariableDie, dwarf::DW_AT_artificial);
 
   if (isScopeAbstract) {
     DV->setDIE(VariableDie);
@@ -1446,7 +1459,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     Offset -= FieldOffset;
 
     // Maybe we need to work from the other end.
-    if (Asm->getTargetData().isLittleEndian())
+    if (Asm->getDataLayout().isLittleEndian())
       Offset = FieldSize - (Offset + Size);
     addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index b4ff9e8..fad9b6e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -176,6 +176,9 @@ public:
   }
 public:
 
+  /// addFlag - Add a flag that is true to the DIE.
+  void addFlag(DIE *Die, unsigned Attribute);
+  
   /// addUInt - Add an unsigned integer attribute data and value.
   ///
   void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
@@ -280,8 +283,8 @@ public:
   /// for the given DITemplateTypeParameter.
   DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
 
-  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateValueParameter.
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
+  /// new DIE for the given DITemplateValueParameter.
   DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
 
   /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 649684a..367b523 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -27,7 +27,7 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -54,9 +54,29 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
-static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+namespace {
+  enum DefaultOnOff {
+    Default, Enable, Disable
+  };
+}
+
+static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
      cl::desc("Output prototype dwarf accelerator tables."),
-     cl::init(false));
+     cl::values(
+                clEnumVal(Default, "Default for platform"),
+                clEnumVal(Enable, "Enabled"),
+                clEnumVal(Disable, "Disabled"),
+                clEnumValEnd),
+     cl::init(Default));
+
+static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
+     cl::desc("Compatibility with Darwin gdb."),
+     cl::values(
+                clEnumVal(Default, "Default for platform"),
+                clEnumVal(Enable, "Enabled"),
+                clEnumVal(Disable, "Disabled"),
+                clEnumValEnd),
+     cl::init(Default));
 
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
@@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
-  // Turn on accelerator tables for Darwin.
-  if (Triple(M->getTargetTriple()).isOSDarwin())
-    DwarfAccelTables = true;
-  
+  // Turn on accelerator tables and older gdb compatibility
+  // for Darwin.
+  bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+  if (DarwinGDBCompat == Default) {
+    if (isDarwin)
+      isDarwinGDBCompat = true;
+    else
+      isDarwinGDBCompat = false;
+  } else
+    isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+
+  if (DwarfAccelTables == Default) {
+    if (isDarwin)
+      hasDwarfAccelTables = true;
+    else
+      hasDwarfAccelTables = false;
+  } else
+    hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule(M);
@@ -272,44 +307,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   assert(SPDie && "Unable to find subprogram DIE!");
   DISubprogram SP(SPNode);
 
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  if (!SPDecl.isSubprogram()) {
-    // There is not any need to generate specification DIE for a function
-    // defined at compile unit level. If a function is defined inside another
-    // function then gdb prefers the definition at top level and but does not
-    // expect specification DIE in parent function. So avoid creating
-    // specification DIE for a function defined inside a function.
-    if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-        !SP.getContext().isFile() &&
-        !isSubprogramContext(SP.getContext())) {
-      SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-      
-      // Add arguments.
-      DICompositeType SPTy = SP.getType();
-      DIArray Args = SPTy.getTypeArray();
-      unsigned SPTag = SPTy.getTag();
-      if (SPTag == dwarf::DW_TAG_subroutine_type)
-        for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-          DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-          DIType ATy = DIType(DIType(Args.getElement(i)));
-          SPCU->addType(Arg, ATy);
-          if (ATy.isArtificial())
-            SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
-          SPDie->addChild(Arg);
-        }
-      DIE *SPDeclDie = SPDie;
-      SPDie = new DIE(dwarf::DW_TAG_subprogram);
-      SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                        SPDeclDie);
-      SPCU->addDie(SPDie);
-    }
-  }
-  // Pick up abstract subprogram DIE.
+  // If we're updating an abstract DIE, then we will be adding the children and
+  // object pointer later on. But what we don't want to do is process the
+  // concrete DIE twice.
   if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+    // Pick up abstract subprogram DIE.
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
     SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
                       dwarf::DW_FORM_ref4, AbsSPDIE);
     SPCU->addDie(SPDie);
+  } else {
+    DISubprogram SPDecl = SP.getFunctionDeclaration();
+    if (!SPDecl.isSubprogram()) {
+      // There is not any need to generate specification DIE for a function
+      // defined at compile unit level. If a function is defined inside another
+      // function then gdb prefers the definition at top level and but does not
+      // expect specification DIE in parent function. So avoid creating
+      // specification DIE for a function defined inside a function.
+      if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+          !SP.getContext().isFile() &&
+          !isSubprogramContext(SP.getContext())) {
+        SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
+
+        // Add arguments.
+        DICompositeType SPTy = SP.getType();
+        DIArray Args = SPTy.getTypeArray();
+        unsigned SPTag = SPTy.getTag();
+        if (SPTag == dwarf::DW_TAG_subroutine_type)
+          for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+            DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+            DIType ATy = DIType(Args.getElement(i));
+            SPCU->addType(Arg, ATy);
+            if (ATy.isArtificial())
+              SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
+            if (ATy.isObjectPointer())
+              SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
+                                dwarf::DW_FORM_ref4, Arg);
+            SPDie->addChild(Arg);
+          }
+        DIE *SPDeclDie = SPDie;
+        SPDie = new DIE(dwarf::DW_TAG_subprogram);
+        SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                          SPDeclDie);
+        SPCU->addDie(SPDie);
+      }
+    }
   }
 
   SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
@@ -346,7 +388,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getTargetData().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -386,7 +428,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   DISubprogram InlinedSP = getDISubprogram(DS);
   DIE *OriginDIE = TheCU->getDIE(InlinedSP);
   if (!OriginDIE) {
-    DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+    DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
     return NULL;
   }
 
@@ -395,7 +437,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
   if (StartLabel == 0 || EndLabel == 0) {
-    llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
+    llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
   }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
@@ -412,7 +454,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getTargetData().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -461,21 +503,26 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
     return NULL;
 
   SmallVector<DIE *, 8> Children;
+  DIE *ObjectPointer = NULL;
 
   // Collect arguments for current function.
   if (LScopes.isCurrentFunctionScope(Scope))
     for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
       if (DbgVariable *ArgDV = CurrentFnArguments[i])
         if (DIE *Arg = 
-            TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope()))
+            TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
           Children.push_back(Arg);
+          if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+        }
 
   // Collect lexical scope children first.
   const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
   for (unsigned i = 0, N = Variables.size(); i < N; ++i)
     if (DIE *Variable = 
-        TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope()))
+        TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
       Children.push_back(Variable);
+      if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+    }
   const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
     if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
@@ -509,6 +556,10 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
          E = Children.end(); I != E; ++I)
     ScopeDIE->addChild(*I);
 
+  if (DS.isSubprogram() && ObjectPointer != NULL)
+    TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+                       dwarf::DW_FORM_ref4, ObjectPointer);
+
   if (DS.isSubprogram())
     TheCU->addPubTypes(DISubprogram(DS));
 
@@ -556,7 +607,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die,
+                                       Asm, this);
   NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
@@ -575,7 +627,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   if (!CompilationDir.empty())
     NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
-    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
 
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
@@ -755,7 +807,7 @@ void DwarfDebug::endModule() {
         LexicalScope *Scope = 
           new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
         DeadFnScopeMap[SP] = Scope;
-        
+
         // Construct subprogram DIE and add variables DIEs.
         CompileUnit *SPCU = CUMap.lookup(TheCU);
         assert(SPCU && "Unable to find Compile Unit!");
@@ -802,9 +854,9 @@ void DwarfDebug::endModule() {
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
 
   // End text sections.
-  for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
-    Asm->OutStreamer.SwitchSection(SectionMap[i]);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+  for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
+    Asm->OutStreamer.SwitchSection(SectionMap[I]);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
   }
 
   // Compute DIE offsets and sizes.
@@ -816,8 +868,8 @@ void DwarfDebug::endModule() {
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
-  // Emit info into a dwarf accelerator table sections.
-  if (DwarfAccelTables) {
+  // Emit info into the dwarf accelerator table sections.
+  if (useDwarfAccelTables()) {
     emitAccelNames();
     emitAccelObjC();
     emitAccelNamespaces();
@@ -825,7 +877,10 @@ void DwarfDebug::endModule() {
   }
   
   // Emit info into a debug pubtypes section.
-  emitDebugPubTypes();
+  // TODO: When we don't need the option anymore we can
+  // remove all of the code that adds to the table.
+  if (useDarwinGDBCompat())
+    emitDebugPubTypes();
 
   // Emit info into a debug loc section.
   emitDebugLoc();
@@ -840,7 +895,11 @@ void DwarfDebug::endModule() {
   emitDebugMacInfo();
 
   // Emit inline info.
-  emitDebugInlineInfo();
+  // TODO: When we don't need the option anymore we
+  // can remove all of the code that this section
+  // depends upon.
+  if (useDarwinGDBCompat())
+    emitDebugInlineInfo();
 
   // Emit info into a debug str section.
   emitDebugStr();
@@ -1014,7 +1073,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     if (AbsVar)
       AbsVar->setMInsn(MInsn);
 
-    // Simple ranges that are fully coalesced.
+    // Simplify ranges that are fully coalesced.
     if (History.size() <= 1 || (History.size() == 2 &&
                                 MInsn->isIdenticalTo(History.back()))) {
       RegVar->setMInsn(MInsn);
@@ -1267,7 +1326,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
             // Coalesce identical entries at the end of History.
             if (History.size() >= 2 &&
                 Prev->isIdenticalTo(History[History.size() - 2])) {
-              DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+              DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
                     << "\t" << *Prev 
                     << "\t" << *History[History.size() - 2] << "\n");
               History.pop_back();
@@ -1283,7 +1342,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                 PrevMBB->getLastNonDebugInstr();
               if (LastMI == PrevMBB->end()) {
                 // Drop DBG_VALUE for empty range.
-                DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+                DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
                       << "\t" << *Prev << "\n");
                 History.pop_back();
               }
@@ -1300,9 +1359,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
         if (!MI->isLabel())
           AtBlockEntry = false;
 
-        // First known non DBG_VALUE location marks beginning of function
-        // body.
-        if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+        // First known non-DBG_VALUE and non-frame setup location marks
+        // the beginning of the function body.
+        if (!MI->getFlag(MachineInstr::FrameSetup) &&
+            (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
           PrologEndLoc = MI->getDebugLoc();
 
         // Check if the instruction clobbers any registers with debug vars.
@@ -1382,7 +1442,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
+                     0);
   }
 }
 
@@ -1439,8 +1499,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
   
   if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
-    TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
-                   dwarf::DW_FORM_flag, 1);
+    TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
   DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
                                                MMI->getFrameMoves()));
@@ -1710,7 +1769,7 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
                            DwarfAbbrevSectionSym);
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
     emitDIE(Die);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
@@ -1756,14 +1815,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(0);
 
   Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
   Asm->OutStreamer.AddComment("DW_LNE_set_address");
   Asm->EmitInt8(dwarf::DW_LNE_set_address);
 
   Asm->OutStreamer.AddComment("Section end label");
 
   Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getTargetData().getPointerSize(),
+                                   Asm->getDataLayout().getPointerSize(),
                                    0/*AddrSpace*/);
 
   // Mark end of matrix.
@@ -1992,7 +2051,7 @@ void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
-  unsigned char Size = Asm->getTargetData().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize();
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
   for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2089,7 +2148,7 @@ void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
-  unsigned char Size = Asm->getTargetData().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize();
   for (SmallVector<const MCSymbol *, 8>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
@@ -2147,7 +2206,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Dwarf Version");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
-  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
   for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2178,7 +2237,7 @@ void DwarfDebug::emitDebugInlineInfo() {
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
       Asm->OutStreamer.EmitSymbolValue(LI->first,
-                                       Asm->getTargetData().getPointerSize(),0);
+                                       Asm->getDataLayout().getPointerSize(),0);
     }
   }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index d1d6512..61d9a51 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -21,9 +21,9 @@
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -96,7 +96,8 @@ typedef struct DotDebugLocEntry {
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
     : Begin(B), End(E), Variable(0), Merged(false), 
       Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+                   const ConstantInt *IPtr)
     : Begin(B), End(E), Variable(0), Merged(false), 
       Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
@@ -158,11 +159,19 @@ public:
   bool isArtificial()                const {
     if (Var.isArtificial())
       return true;
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable
-        && getType().isArtificial())
+    if (getType().isArtificial())
       return true;
     return false;
   }
+
+  bool isObjectPointer()             const {
+    if (Var.isObjectPointer())
+      return true;
+    if (getType().isObjectPointer())
+      return true;
+    return false;
+  }
+  
   bool variableHasComplexAddress()   const {
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.hasComplexAddress();
@@ -222,7 +231,7 @@ class DwarfDebug {
   
   /// SectionMap - Provides a unique id per text section.
   ///
-  UniqueVector<const MCSection*> SectionMap;
+  SetVector<const MCSection*> SectionMap;
 
   /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
   SmallVector<DbgVariable *, 8> CurrentFnArguments;
@@ -307,6 +316,9 @@ class DwarfDebug {
   // table for the same directory as DW_at_comp_dir.
   StringRef CompilationDir;
 
+  // A holder for the DarwinGDBCompat flag so that the compile unit can use it.
+  bool isDarwinGDBCompat;
+  bool hasDwarfAccelTables;
 private:
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -520,6 +532,11 @@ public:
   /// getStringPoolEntry - returns an entry into the string pool with the given
   /// string text.
   MCSymbol *getStringPoolEntry(StringRef Str);
+
+  /// useDarwinGDBCompat - returns whether or not to limit some of our debug
+  /// output to the limitations of darwin gdb.
+  bool useDarwinGDBCompat() { return isDarwinGDBCompat; }
+  bool useDwarfAccelTables() { return hasDwarfAccelTables; }
 };
 } // End of namespace llvm
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 70cc2e5..08fb6b3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -24,7 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
     // dwarf::DW_EH_PE_absptr
-    TypeFormatSize = Asm->getTargetData().getPointerSize();
+    TypeFormatSize = Asm->getDataLayout().getPointerSize();
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 75f6056..fe9e493 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -43,26 +43,6 @@ protected:
   /// MMI - Collected machine module information.
   MachineModuleInfo *MMI;
 
-  /// EmitExceptionTable - Emit landing pads and actions.
-  ///
-  /// The general organization of the table is complex, but the basic concepts
-  /// are easy.  First there is a header which describes the location and
-  /// organization of the three components that follow.
-  ///  1. The landing pad site information describes the range of code covered
-  ///     by the try.  In our case it's an accumulation of the ranges covered
-  ///     by the invokes in the try.  There is also a reference to the landing
-  ///     pad that handles the exception once processed.  Finally an index into
-  ///     the actions table.
-  ///  2. The action table, in our case, is composed of pairs of type ids
-  ///     and next action offset.  Starting with the action index from the
-  ///     landing pad site, each type Id is checked for a match to the current
-  ///     exception.  If it matches then the exception and type id are passed
-  ///     on to the landing pad.  Otherwise the next action is looked up.  This
-  ///     chain is terminated with a next action of zero.  If no type id is
-  ///     found the frame is unwound and handling continues.
-  ///  3. Type id table contains references to all the C++ typeinfo for all
-  ///     catches in the function.  This tables is reversed indexed base 1.
-
   /// SharedTypeIds - How many leading type ids two landing pads have in common.
   static unsigned SharedTypeIds(const LandingPadInfo *L,
                                 const LandingPadInfo *R);
@@ -119,6 +99,26 @@ protected:
                             const RangeMapType &PadMap,
                             const SmallVectorImpl<const LandingPadInfo *> &LPs,
                             const SmallVectorImpl<unsigned> &FirstActions);
+
+  /// EmitExceptionTable - Emit landing pads and actions.
+  ///
+  /// The general organization of the table is complex, but the basic concepts
+  /// are easy.  First there is a header which describes the location and
+  /// organization of the three components that follow.
+  ///  1. The landing pad site information describes the range of code covered
+  ///     by the try.  In our case it's an accumulation of the ranges covered
+  ///     by the invokes in the try.  There is also a reference to the landing
+  ///     pad that handles the exception once processed.  Finally an index into
+  ///     the actions table.
+  ///  2. The action table, in our case, is composed of pairs of type ids
+  ///     and next action offset.  Starting with the action index from the
+  ///     landing pad site, each type Id is checked for a match to the current
+  ///     exception.  If it matches then the exception and type id are passed
+  ///     on to the landing pad.  Otherwise the next action is looked up.  This
+  ///     chain is terminated with a next action of zero.  If no type id is
+  ///     found the frame is unwound and handling continues.
+  ///  3. Type id table contains references to all the C++ typeinfo for all
+  ///     catches in the function.  This tables is reversed indexed base 1.
   void EmitExceptionTable();
 
 public:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 1153817..f7c0119 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -20,7 +20,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallString.h"
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
-  unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index b83aa5a..70742a8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -24,7 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index fb65bb7..6f4c5a2 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -357,9 +357,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
     --I2;
     while (I2->isDebugValue()) {
-      if (I2 == MBB2->begin()) {
+      if (I2 == MBB2->begin())
         return TailLen;
-        }
       --I2;
     }
     ++I2;
@@ -482,21 +481,19 @@ bool
 BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
   if (getHash() < o.getHash())
     return true;
-   else if (getHash() > o.getHash())
+  if (getHash() > o.getHash())
     return false;
-  else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+  if (getBlock()->getNumber() < o.getBlock()->getNumber())
     return true;
-  else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+  if (getBlock()->getNumber() > o.getBlock()->getNumber())
     return false;
-  else {
-    // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
-    // an object with itself.
+  // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+  // an object with itself.
 #ifndef _GLIBCXX_DEBUG
-    llvm_unreachable("Predecessor appears twice");
+  llvm_unreachable("Predecessor appears twice");
 #else
-    return false;
+  return false;
 #endif
-  }
 }
 
 /// CountTerminators - Count the number of terminators in the given
@@ -574,7 +571,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+      MF->getFunction()->getFnAttributes().
+        hasAttribute(Attributes::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
@@ -1554,8 +1552,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         Uses.insert(*AI);
     } else {
-      if (Uses.count(Reg)) {
-        Uses.erase(Reg);
+      if (Uses.erase(Reg)) {
         for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
           Uses.erase(*SubRegs); // Use sub-registers to be conservative
       }
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 939af3f..dee339a 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -9,7 +9,6 @@
 
 #define DEBUG_TYPE "calcspillweights"
 
-#include "llvm/Function.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
 bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
-               << "********** Function: "
-               << MF.getFunction()->getName() << '\n');
+               << "********** Function: " << MF.getName() << '\n');
 
   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -166,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       continue;
     float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && LIS.isAllocatable(hint))
+      if (hweight > bestPhys && mri.isAllocatable(hint))
         bestPhys = hweight, hintPhys = hint;
     } else {
       if (hweight > bestVirt)
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 0b747fd..22b9140 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
 using namespace llvm;
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
   if (MinAlign > (int)Align)
     Align = MinAlign;
   MF.getFrameInfo()->ensureMaxAlignment(Align);
-  TM.getTargetLowering()->HandleByVal(this, Size);
+  TM.getTargetLowering()->HandleByVal(this, Size, Align);
   unsigned Offset = AllocateStack(Size, Align);
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index fb2c2e8..a53f6f8 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineCopyPropagationPass(Registry);
   initializeMachineCSEPass(Registry);
   initializeMachineDominatorTreePass(Registry);
+  initializeMachinePostDominatorTreePass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
@@ -56,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeRegisterCoalescerPass(Registry);
   initializeSlotIndexesPass(Registry);
   initializeStackProtectorPass(Registry);
+  initializeStackColoringPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeStrongPHIEliminationPass(Registry);
   initializeTailDuplicatePassPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
index 99233df..d8e06c3 100644
--- a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
+++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
@@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
 ///
 bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
   const Function *F = MF.getFunction();
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
+  if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
     return false;
 
   unsigned Align = TLI->getPrefLoopAlignment();
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a9de1c749..377b471 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!RegClassInfo.isAllocatable(AntiDepReg))
+          if (!MRI.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.test(AntiDepReg))
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index b4394e8..8964269 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -33,7 +33,6 @@ namespace {
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     BitVector LivePhysRegs;
-    BitVector ReservedRegs;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         // Don't delete live physreg defs, or any reserved register defs.
-        if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+        if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
           return false;
       } else {
         if (!MRI->use_nodbg_empty(Reg))
@@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
 
-  // Treat reserved registers as always live.
-  ReservedRegs = TRI->getReservedRegs(MF);
-
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
   // be cleaned up.
@@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock *MBB = &*I;
 
     // Start out assuming that reserved registers are live out of this block.
-    LivePhysRegs = ReservedRegs;
+    LivePhysRegs = MRI->getReservedRegs();
 
     // Also add any explicit live-out physregs for this block.
     if (!MBB->empty() && MBB->back().isReturn())
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index f9347ef..d5d8404 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -18,7 +18,6 @@
 
 #define DEBUG_TYPE "early-ifcvt"
 #include "MachineTraceMetrics.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
@@ -32,9 +31,9 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -775,11 +774,11 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
 
 bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
-               << "********** Function: "
-               << ((Value*)MF.getFunction())->getName() << '\n');
+               << "********** Function: " << MF.getName() << '\n');
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
-  SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
+  SchedModel =
+    MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -798,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
     if (tryConvertIf(I->getBlock()))
       Changed = true;
 
-  MF.verify(this, "After early if-conversion");
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index fee8e47..ed78f19 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -626,9 +626,12 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
   }
   dv->Instrs.push_back(mi);
 
-  // Finally set all defs and non-collapsed uses to dv.
-  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
-    MachineOperand &mo = mi->getOperand(i);
+  // Finally set all defs and non-collapsed uses to dv. We must iterate through
+  // all the operators, including imp-def ones.
+  for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+                                  ee = mi->operands_end();
+                                  ii != ee; ++ii) {
+    MachineOperand &mo = *ii;
     if (!mo.isReg()) continue;
     int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
@@ -654,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
        I != E; ++I)
-    if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
+    if (MF->getRegInfo().isPhysRegUsed(*I)) {
       anyregs = true;
       break;
     }
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 7a17331..ffe4b63 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "postrapseudos"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
 bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "Machine Function\n"
                << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
-               << "********** Function: "
-               << MF.getFunction()->getName() << '\n');
+               << "********** Function: " << MF.getName() << '\n');
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
 
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 506b5cf..f4755bb 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -387,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   const TargetFrameLowering *TFI = TM->getFrameLowering();
   assert(TFI && "TargetRegisterInfo not available!");
 
-  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
-                                      RE = FI->roots_end(); RI != RE; ++RI)
-    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+       RI != FI->roots_end();) {
+    // If the root references a dead object, no need to keep it.
+    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+      RI = FI->removeStackRoot(RI);
+    } else {
+      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+      ++RI;
+    }
+  }
 }
 
 bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index 4214ba1..31e36f0 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -13,7 +13,6 @@
 
 #define DEBUG_TYPE "ifcvt"
 #include "BranchFolding.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   }
 
   DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
-               << MF.getFunction()->getName() << "\'");
+               << MF.getName() << "\'");
 
   if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
     DEBUG(dbgs() << " skipped\n");
@@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
   }
   for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
     unsigned Reg = Defs[i];
-    if (Redefs.count(Reg)) {
+    if (!Redefs.insert(Reg)) {
       if (AddImpUse)
         // Treat predicated update as read + write.
         MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
                                               true/*IsImp*/,false/*IsKill*/,
                                               false/*IsDead*/,true/*IsUndef*/));
     } else {
-      Redefs.insert(Reg);
       for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
         Redefs.insert(*SubRegs);
     }
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 07e37af..37828a7 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
     propagateSiblingValue(SVI);
   } while (!WorkList.empty());
 
-  // Look up the value we were looking for.  We already did this lokup at the
+  // Look up the value we were looking for.  We already did this lookup at the
   // top of the function, but SibValues may have been invalidated.
   SVI = SibValues.find(UseVNI);
   assert(SVI != SibValues.end() && "Didn't compute requested info");
@@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   // If the instruction also writes VirtReg.reg, it had better not require the
   // same register for uses and defs.
   SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
-  MIBundleOperands::RegInfo RI =
+  MIBundleOperands::VirtRegInfo RI =
     MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
   if (RI.Tied) {
     markValueUsed(&VirtReg, ParentVNI);
@@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
 
     // Analyze instruction.
     SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
-    MIBundleOperands::RegInfo RI =
+    MIBundleOperands::VirtRegInfo RI =
       MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
 
     // Find the slot index where this instruction reads and writes OldLI.
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8d2282a..6120ae56 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -21,7 +21,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 using namespace llvm;
 
 template <class ArgIt>
@@ -457,7 +457,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -468,7 +468,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memmove: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -479,7 +479,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::memset: {
-    IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = TD.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index d631726..defc127 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
   clear();
   LS.initialize(mf);
   DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
-               << ((Value*)mf.getFunction())->getName()
-               << " **********\n");
+               << mf.getName() << " **********\n");
 
   bool Changed = collectDebugValues(mf);
   computeIntervals();
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 0a795e6..8585cbb 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "RegisterCoalescer.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -58,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
     return VNI;
   }
   if (SlotIndex::isSameInstr(Def, I->start)) {
-    assert(I->start == Def && "Cannot insert def, already live");
-    assert(I->valno->def == Def && "Inconsistent existing value def");
+    assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+    // It is possible to have both normal and early-clobber defs of the same
+    // register on an instruction. It doesn't make a lot of sense, but it is
+    // possible to specify in inline assembly.
+    //
+    // Just convert everything to early-clobber.
+    Def = std::min(Def, I->start);
+    if (Def != I->start)
+      I->start = I->valno->def = Def;
     return I->valno;
   }
   assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
@@ -68,21 +77,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
   return VNI;
 }
 
-/// killedInRange - Return true if the interval has kills in [Start,End).
-bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
-  Ranges::const_iterator r =
-    std::lower_bound(ranges.begin(), ranges.end(), End);
-
-  // Now r points to the first interval with start >= End, or ranges.end().
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  // Now r points to the last interval with end <= End.
-  // r->end is the kill point.
-  return r->end >= Start && r->end < End;
-}
-
 // overlaps - Return true if the intersection of the two live intervals is
 // not empty.
 //
@@ -142,6 +136,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
   return false;
 }
 
+bool LiveInterval::overlaps(const LiveInterval &Other,
+                            const CoalescerPair &CP,
+                            const SlotIndexes &Indexes) const {
+  assert(!empty() && "empty interval");
+  if (Other.empty())
+    return false;
+
+  // Use binary searches to find initial positions.
+  const_iterator I = find(Other.beginIndex());
+  const_iterator IE = end();
+  if (I == IE)
+    return false;
+  const_iterator J = Other.find(I->start);
+  const_iterator JE = Other.end();
+  if (J == JE)
+    return false;
+
+  for (;;) {
+    // J has just been advanced to satisfy:
+    assert(J->end >= I->start);
+    // Check for an overlap.
+    if (J->start < I->end) {
+      // I and J are overlapping. Find the later start.
+      SlotIndex Def = std::max(I->start, J->start);
+      // Allow the overlap if Def is a coalescable copy.
+      if (Def.isBlock() ||
+          !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+        return true;
+    }
+    // Advance the iterator that ends first to check for more overlaps.
+    if (J->end > I->end) {
+      std::swap(I, J);
+      std::swap(IE, JE);
+    }
+    // Advance J until J->end >= I->start.
+    do
+      if (++J == JE)
+        return false;
+    while (J->end < I->start);
+  }
+}
+
 /// overlaps - Return true if the live interval overlaps a range specified
 /// by [Start, End).
 bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
@@ -399,7 +435,7 @@ void LiveInterval::join(LiveInterval &Other,
 
   // If we have to apply a mapping to our base interval assignment, rewrite it
   // now.
-  if (MustMapCurValNos) {
+  if (MustMapCurValNos && !empty()) {
     // Map the first live range.
 
     iterator OutIt = begin();
@@ -673,27 +709,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
   return V2;
 }
 
-void LiveInterval::Copy(const LiveInterval &RHS,
-                        MachineRegisterInfo *MRI,
-                        VNInfo::Allocator &VNInfoAllocator) {
-  ranges.clear();
-  valnos.clear();
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
-  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
-
-  weight = RHS.weight;
-  for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
-    const VNInfo *VNI = RHS.getValNumInfo(i);
-    createValueCopy(VNI, VNInfoAllocator);
-  }
-  for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
-    const LiveRange &LR = RHS.ranges[i];
-    addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
-  }
-
-  verify();
-}
-
 unsigned LiveInterval::getSize() const {
   unsigned Sum = 0;
   for (const_iterator I = begin(), E = end(); I != E; ++I)
@@ -705,9 +720,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
   return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveRange::dump() const {
   dbgs() << *this << "\n";
 }
+#endif
 
 void LiveInterval::print(raw_ostream &OS) const {
   if (empty())
@@ -740,9 +757,11 @@ void LiveInterval::print(raw_ostream &OS) const {
   }
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveInterval::dump() const {
   dbgs() << *this << "\n";
 }
+#endif
 
 #ifndef NDEBUG
 void LiveInterval::verify() const {
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index d0f8ae1..4e75d89 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "LiveRangeCalc.h"
+#include "VirtRegMap.h"
 #include <algorithm>
 #include <limits>
 #include <cmath>
@@ -109,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   DomTree = &getAnalysis<MachineDominatorTree>();
   if (!LRCalc)
     LRCalc = new LiveRangeCalc();
-  AllocatableRegs = TRI->getAllocatableSet(fn);
-  ReservedRegs = TRI->getReservedRegs(fn);
 
   // Allocate space for all virtual registers.
   VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -147,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
       OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
   }
 
+  OS << "RegMasks:";
+  for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+    OS << ' ' << RegMaskSlots[i];
+  OS << '\n';
+
   printInstrs(OS);
 }
 
@@ -155,9 +159,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
   MF->print(OS, Indexes);
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void LiveIntervals::dumpInstrs() const {
   printInstrs(dbgs());
 }
+#endif
 
 static
 bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
@@ -382,8 +388,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 /// which a variable is live
 void LiveIntervals::computeIntervals() {
   DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
-               << "********** Function: "
-               << ((Value*)MF->getFunction())->getName() << '\n');
+               << "********** Function: " << MF->getName() << '\n');
 
   RegMaskBlocks.resize(MF->getNumBlockIDs());
 
@@ -440,7 +445,7 @@ void LiveIntervals::computeIntervals() {
 
     // Compute the number of register mask instructions in this block.
     std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
-    RMB.second = RegMaskSlots.size() - RMB.first;;
+    RMB.second = RegMaskSlots.size() - RMB.first;
   }
 
   // Create empty intervals for registers defined by implicit_def's (except
@@ -497,7 +502,7 @@ void LiveIntervals::computeRegMasks() {
           RegMaskBits.push_back(MO->getRegMask());
       }
     // Compute the number of register mask instructions in this block.
-    RMB.second = RegMaskSlots.size() - RMB.first;;
+    RMB.second = RegMaskSlots.size() - RMB.first;
   }
 }
 
@@ -540,11 +545,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
   // Ignore uses of reserved registers. We only track defs of those.
   for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
     unsigned Root = *Roots;
-    if (!isReserved(Root) && !MRI->reg_empty(Root))
+    if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
       LRCalc->extendToUses(LI, Root);
     for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
       unsigned Reg = *Supers;
-      if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+      if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
         LRCalc->extendToUses(LI, Reg);
     }
   }
@@ -729,17 +734,100 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
   return CanSeparate;
 }
 
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+                                    ArrayRef<SlotIndex> Indices) {
+  assert(LRCalc && "LRCalc not initialized.");
+  LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+    LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+                               SmallVectorImpl<SlotIndex> *EndPoints) {
+  LiveRangeQuery LRQ(*LI, Kill);
+  VNInfo *VNI = LRQ.valueOut();
+  if (!VNI)
+    return;
+
+  MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+  SlotIndex MBBStart, MBBEnd;
+  tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+  // If VNI isn't live out from KillMBB, the value is trivially pruned.
+  if (LRQ.endPoint() < MBBEnd) {
+    LI->removeRange(Kill, LRQ.endPoint());
+    if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+    return;
+  }
+
+  // VNI is live out of KillMBB.
+  LI->removeRange(Kill, MBBEnd);
+  if (EndPoints) EndPoints->push_back(MBBEnd);
+
+  // Find all blocks that are reachable from KillMBB without leaving VNI's live
+  // range. It is possible that KillMBB itself is reachable, so start a DFS
+  // from each successor.
+  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  VisitedTy Visited;
+  for (MachineBasicBlock::succ_iterator
+       SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+       SuccI != SuccE; ++SuccI) {
+    for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+         I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+         I != E;) {
+      MachineBasicBlock *MBB = *I;
+
+      // Check if VNI is live in to MBB.
+      tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+      LiveRangeQuery LRQ(*LI, MBBStart);
+      if (LRQ.valueIn() != VNI) {
+        // This block isn't part of the VNI live range. Prune the search.
+        I.skipChildren();
+        continue;
+      }
+
+      // Prune the search if VNI is killed in MBB.
+      if (LRQ.endPoint() < MBBEnd) {
+        LI->removeRange(MBBStart, LRQ.endPoint());
+        if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+        I.skipChildren();
+        continue;
+      }
+
+      // VNI is live through MBB.
+      LI->removeRange(MBBStart, MBBEnd);
+      if (EndPoints) EndPoints->push_back(MBBEnd);
+      ++I;
+    }
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
 //
 
-void LiveIntervals::addKillFlags() {
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+  // Keep track of regunit ranges.
+  SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU;
+
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI->reg_nodbg_empty(Reg))
       continue;
     LiveInterval *LI = &getInterval(Reg);
+    if (LI->empty())
+      continue;
+
+    // Find the regunit intervals for the assigned register. They may overlap
+    // the virtual register live range, cancelling any kills.
+    RU.clear();
+    for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
+         ++Units) {
+      LiveInterval *RUInt = &getRegUnit(*Units);
+      if (RUInt->empty())
+        continue;
+      RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end)));
+    }
 
     // Every instruction that kills Reg corresponds to a live range end point.
     for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
@@ -750,7 +838,32 @@ void LiveIntervals::addKillFlags() {
       MachineInstr *MI = getInstructionFromIndex(RI->end);
       if (!MI)
         continue;
-      MI->addRegisterKilled(Reg, NULL);
+
+      // Check if any of the reguints are live beyond the end of RI. That could
+      // happen when a physreg is defined as a copy of a virtreg:
+      //
+      //   %EAX = COPY %vreg5
+      //   FOO %vreg5         <--- MI, cancel kill because %EAX is live.
+      //   BAR %EAX<kill>
+      //
+      // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+      bool CancelKill = false;
+      for (unsigned u = 0, e = RU.size(); u != e; ++u) {
+        LiveInterval *RInt = RU[u].first;
+        LiveInterval::iterator &I = RU[u].second;
+        if (I == RInt->end())
+          continue;
+        I = RInt->advanceTo(I, RI->end);
+        if (I == RInt->end() || I->start >= RI->end)
+          continue;
+        // I is overlapping RI.
+        CancelKill = true;
+        break;
+      }
+      if (CancelKill)
+        MI->clearRegisterKills(Reg, NULL);
+      else
+        MI->addRegisterKilled(Reg, NULL);
     }
   }
 }
@@ -900,497 +1013,321 @@ private:
   LiveIntervals& LIS;
   const MachineRegisterInfo& MRI;
   const TargetRegisterInfo& TRI;
+  SlotIndex OldIdx;
   SlotIndex NewIdx;
-
-  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
-  typedef DenseSet<IntRangePair> RangeSet;
-
-  struct RegRanges {
-    LiveRange* Use;
-    LiveRange* EC;
-    LiveRange* Dead;
-    LiveRange* Def;
-    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
-  };
-  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+  SmallPtrSet<LiveInterval*, 8> Updated;
+  bool UpdateFlags;
 
 public:
   HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
-           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
-    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
-  // Update intervals for all operands of MI from OldIdx to NewIdx.
-  // This assumes that MI used to be at OldIdx, and now resides at
-  // NewIdx.
-  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
-    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
-    // Collect the operands.
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
-    // To keep the LiveRanges valid within an interval, move the ranges closest
-    // to the destination first. This prevents ranges from overlapping, to that
-    // APIs like removeRange still work.
-    if (NewIdx < OldIdx) {
-      moveAllEnteringFrom(OldIdx, Entering);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllExitingFrom(OldIdx, Exiting);
-    }
-    else {
-      moveAllExitingFrom(OldIdx, Exiting);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllEnteringFrom(OldIdx, Entering);
-    }
-
-    if (hasRegMaskOp)
-      updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
+           const TargetRegisterInfo& TRI,
+           SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+    : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+      UpdateFlags(UpdateFlags) {}
+
+  // FIXME: UpdateFlags is a workaround that creates live intervals for all
+  // physregs, even those that aren't needed for regalloc, in order to update
+  // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+  // flags, and postRA passes will use a live register utility instead.
+  LiveInterval *getRegUnitLI(unsigned Unit) {
+    if (UpdateFlags)
+      return &LIS.getRegUnit(Unit);
+    return LIS.getCachedRegUnit(Unit);
   }
 
-  // Update intervals for all operands of MI to refer to BundleStart's
-  // SlotIndex.
-  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
-    if (MI == BundleStart)
-      return; // Bundling instr with itself - nothing to do.
-
-    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
-    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
-           "SlotIndex <-> Instruction mapping broken for MI");
-
-    // Collect all ranges already in the bundle.
-    MachineBasicBlock::instr_iterator BII(BundleStart);
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
-      if (&*BII == MI)
+  /// Update all live ranges touched by MI, assuming a move from OldIdx to
+  /// NewIdx.
+  void updateAllRanges(MachineInstr *MI) {
+    DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+    bool hasRegMask = false;
+    for (MIOperands MO(MI); MO.isValid(); ++MO) {
+      if (MO->isRegMask())
+        hasRegMask = true;
+      if (!MO->isReg())
         continue;
-      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    }
-
-    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
-    Entering.clear();
-    Internal.clear();
-    Exiting.clear();
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+      // Aggressively clear all kill flags.
+      // They are reinserted by VirtRegRewriter.
+      if (MO->isUse())
+        MO->setIsKill(false);
 
-    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
-    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
-    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
-    moveAllEnteringFromInto(OldIdx, Entering, BR);
-    moveAllInternalFromInto(OldIdx, Internal, BR);
-    moveAllExitingFromInto(OldIdx, Exiting, BR);
-
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
-  }
-
-private:
-
-#ifndef NDEBUG
-  class LIValidator {
-  private:
-    DenseSet<const LiveInterval*> Checked, Bogus;
-  public:
-    void operator()(const IntRangePair& P) {
-      const LiveInterval* LI = P.first;
-      if (Checked.count(LI))
-        return;
-      Checked.insert(LI);
-      if (LI->empty())
-        return;
-      SlotIndex LastEnd = LI->begin()->start;
-      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
-           LRI != LRE; ++LRI) {
-        const LiveRange& LR = *LRI;
-        if (LastEnd > LR.start || LR.start >= LR.end)
-          Bogus.insert(LI);
-        LastEnd = LR.end;
-      }
-    }
-
-    bool rangesOk() const {
-      return Bogus.empty();
-    }
-  };
-#endif
-
-  // Collect IntRangePairs for all operands of MI that may need fixing.
-  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
-  // maps).
-  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
-                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
-    hasRegMaskOp = false;
-    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-                                    MOE = MI->operands_end();
-         MOI != MOE; ++MOI) {
-      const MachineOperand& MO = *MOI;
-
-      if (MO.isRegMask()) {
-        hasRegMaskOp = true;
+      unsigned Reg = MO->getReg();
+      if (!Reg)
         continue;
-      }
-
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;
-
-      unsigned Reg = MO.getReg();
-
-      // TODO: Currently we're skipping uses that are reserved or have no
-      // interval, but we're not updating their kills. This should be
-      // fixed.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        updateRange(LIS.getInterval(Reg));
         continue;
-
-      // Collect ranges for register units. These live ranges are computed on
-      // demand, so just skip any that haven't been computed yet.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-          if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
-            collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx);
-      } else {
-        // Collect ranges for individual virtual registers.
-        collectRanges(MO, &LIS.getInterval(Reg),
-                      Entering, Internal, Exiting, OldIdx);
       }
+
+      // For physregs, only update the regunits that actually have a
+      // precomputed live range.
+      for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+        if (LiveInterval *LI = getRegUnitLI(*Units))
+          updateRange(*LI);
     }
+    if (hasRegMask)
+      updateRegMaskSlots();
   }
 
-  void collectRanges(const MachineOperand &MO, LiveInterval *LI,
-                     RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
-                     SlotIndex OldIdx) {
-    if (MO.readsReg()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
-      if (LR != 0)
-        Entering.insert(std::make_pair(LI, LR));
-    }
-    if (MO.isDef()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
-      assert(LR != 0 && "No live range for def?");
-      if (LR->end > OldIdx.getDeadSlot())
-        Exiting.insert(std::make_pair(LI, LR));
+private:
+  /// Update a single live range, assuming an instruction has been moved from
+  /// OldIdx to NewIdx.
+  void updateRange(LiveInterval &LI) {
+    if (!Updated.insert(&LI))
+      return;
+    DEBUG({
+      dbgs() << "     ";
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+        dbgs() << PrintReg(LI.reg);
       else
-        Internal.insert(std::make_pair(LI, LR));
-    }
+        dbgs() << PrintRegUnit(LI.reg, &TRI);
+      dbgs() << ":\t" << LI << '\n';
+    });
+    if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+      handleMoveDown(LI);
+    else
+      handleMoveUp(LI);
+    DEBUG(dbgs() << "        -->\t" << LI << '\n');
+    LI.verify();
   }
 
-  BundleRanges createBundleRanges(RangeSet& Entering,
-                                  RangeSet& Internal,
-                                  RangeSet& Exiting) {
-    BundleRanges BR;
-
-    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Use = LR;
-    }
+  /// Update LI to reflect an instruction has been moved downwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Move def to NewIdx, assert endpoint after NewIdx.
+  ///
+  /// 2. Live def at OldIdx, killed at NewIdx:
+  ///    Change to dead def at NewIdx.
+  ///    (Happens when bundling def+kill together).
+  ///
+  /// 3. Dead def at OldIdx:
+  ///    Move def to NewIdx, possibly across another live value.
+  ///
+  /// 4. Def at OldIdx AND at NewIdx:
+  ///    Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value read at OldIdx, killed before NewIdx:
+  ///    Extend kill to NewIdx.
+  ///
+  void handleMoveDown(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II) {
-      LiveInterval* LI = II->first;
-      LiveRange* LR = II->second;
-      if (LR->end.isDead()) {
-        BR[LI->reg].Dead = LR;
-      } else {
-        BR[LI->reg].EC = LR;
-      }
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+      // If the live-in value already extends to NewIdx, there is nothing to do.
+      if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+        return;
+      // Aggressively remove all kill flags from the old kill point.
+      // Kill flags shouldn't be used while live intervals exist, they will be
+      // reinserted by VirtRegRewriter.
+      if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+        for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isUse())
+            MO->setIsKill(false);
+      // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+      // overlapping ranges. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      // If this was a kill, there may also be a def. Otherwise we're done.
+      if (!isKill)
+        return;
+      ++I;
     }
 
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Def = LR;
+    // Check for a def at OldIdx.
+    if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+      return;
+    // We have a def at OldIdx.
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+    // If the defined value extends beyond NewIdx, just move the def down.
+    // This is case 1 above.
+    if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+      I->start = DefVNI->def;
+      return;
     }
-
-    return BR;
-  }
-
-  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
-    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
-    if (!OldKillMI->killsRegister(reg))
-      return; // Bail out if we don't have kill flags on the old register.
-    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
-    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
-    assert(!NewKillMI->killsRegister(reg) &&
-           "New kill instr is already a kill.");
-    OldKillMI->clearRegisterKills(reg, &TRI);
-    NewKillMI->addRegisterKilled(reg, &TRI);
-  }
-
-  void updateRegMaskSlots(SlotIndex OldIdx) {
-    SmallVectorImpl<SlotIndex>::iterator RI =
-      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
-                       OldIdx);
-    assert(*RI == OldIdx && "No RegMask at OldIdx.");
-    *RI = NewIdx;
-    assert(*prior(RI) < *RI && *RI < *next(RI) &&
-           "RegSlots out of order. Did you move one call across another?");
-  }
-
-  // Return the last use of reg between NewIdx and OldIdx.
-  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
-    SlotIndex LastUse = NewIdx;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI.use_nodbg_begin(Reg),
-           UE = MRI.use_nodbg_end();
-         UI != UE; UI.skipInstruction()) {
-      const MachineInstr* MI = &*UI;
-      SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
-      if (InstSlot > LastUse && InstSlot < OldIdx)
-        LastUse = InstSlot;
+    // The remaining possibilities are now:
+    // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+    // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+    // In either case, it is possible that there is an existing def at NewIdx.
+    assert((I->end == OldIdx.getDeadSlot() ||
+            SlotIndex::isSameInstr(I->end, NewIdx)) &&
+            "Cannot move def below kill");
+    LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+    if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+      // coalesced into that value.
+      assert(NewI->valno != DefVNI && "Multiple defs of value?");
+      LI.removeValNo(DefVNI);
+      return;
     }
-    return LastUse;
+    // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+    // If the def at OldIdx was dead, we allow it to be moved across other LI
+    // values. The new range should be placed immediately before NewI, move any
+    // intermediate ranges up.
+    assert(NewI != I && "Inconsistent iterators");
+    std::copy(llvm::next(I), NewI, I);
+    *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough)
+  /// Update LI to reflect an instruction has been moved upwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Hoist def to NewIdx.
+  ///
+  /// 2. Dead def at OldIdx:
+  ///    Hoist def+end to NewIdx, possibly move across other values.
+  ///
+  /// 3. Dead def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at OldIdx, coalescing it with existing value.
+  ///
+  /// 4. Live def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value killed at OldIdx:
+  ///    Hoist kill to NewIdx, then scan for last kill between NewIdx and
+  ///    OldIdx.
+  ///
+  void handleMoveUp(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
       return;
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    if (LastUse != NewIdx)
-      moveKillFlags(LI->reg, NewIdx, LastUse);
-    LR->end = LastUse.getRegSlot();
-  }
 
-  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    // Extend the LiveRange if NewIdx is past the end.
-    if (NewIdx > LR->end) {
-      // Move kill flags if OldIdx was not originally the end
-      // (otherwise LR->end points to an invalid slot).
-      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
-        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
-        moveKillFlags(LI->reg, LR->end, NewIdx);
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      // If the live-in value isn't killed here, there is nothing to do.
+      if (!SlotIndex::isSameInstr(OldIdx, I->end))
+        return;
+      // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+      // another use, we need to search for that use. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      ++I;
+      // If OldIdx also defines a value, there couldn't have been another use.
+      if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+        // No def, search for the new kill.
+        // This can never be an early clobber kill since there is no def.
+        llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+        return;
       }
-      LR->end = NewIdx.getRegSlot();
-    }
-  }
-
-  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFrom(OldIdx, *EI);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFrom(OldIdx, *EI);
     }
-  }
-
-  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           LR->end <= OldIdx.getDeadSlot() &&
-           "Range should be internal to OldIdx.");
-    LiveRange Tmp(*LR);
-    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    Tmp.valno->def = Tmp.start;
-    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
-    LI->removeRange(*LR);
-    LI->addRange(Tmp);
-  }
-
-  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFrom(OldIdx, *II);
-  }
-
-  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           "Range should start in OldIdx.");
-    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
-    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    LR->start = NewStart;
-    LR->valno->def = NewStart;
-  }
-
-  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFrom(OldIdx, *EI);
-  }
 
-  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
-                              BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough) {
-      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
-             "Def in bundle should be def range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "If bundle has use for this reg it should be LR.");
-      BR[LI->reg].Use = LR;
+    // Now deal with the def at OldIdx.
+    assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+    // Check for an existing def at NewIdx.
+    LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+    if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      assert(NewI->valno != DefVNI && "Same value defined more than once?");
+      // There is an existing def at NewIdx.
+      if (I->end.isDead()) {
+        // Case 3: Remove the dead def at OldIdx.
+        LI.removeValNo(DefVNI);
+        return;
+      }
+      // Case 4: Replace def at NewIdx with live def at OldIdx.
+      I->start = DefVNI->def;
+      LI.removeValNo(NewI->valno);
       return;
     }
 
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    moveKillFlags(LI->reg, OldIdx, LastUse);
-
-    if (LR->start < NewIdx) {
-      // Becoming a new entering range.
-      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
-             "Bundle shouldn't be re-defining reg mid-range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "Bundle shouldn't have different use range for same reg.");
-      LR->end = LastUse.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      // Becoming a new Dead-def.
-      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
-             "Live range starting at unexpected slot.");
-      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
-      assert(BR[LI->reg].Dead == 0 &&
-               "Can't have def and dead def of same reg in a bundle.");
-      LR->end = LastUse.getDeadSlot();
-      BR[LI->reg].Dead = BR[LI->reg].Def;
-      BR[LI->reg].Def = 0;
-    }
-  }
-
-  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
-                                BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    if (NewIdx > LR->end) {
-      // Range extended to bundle. Add to bundle uses.
-      // Note: Currently adds kill flags to bundle start.
-      assert(BR[LI->reg].Use == 0 &&
-             "Bundle already has use range for reg.");
-      moveKillFlags(LI->reg, LR->end, NewIdx);
-      LR->end = NewIdx.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      assert(BR[LI->reg].Use != 0 &&
-             "Bundle should already have a use range for reg.");
-    }
-  }
-
-  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
-                               BundleRanges& BR) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFromInto(OldIdx, *EI, BR);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFromInto(OldIdx, *EI, BR);
+    // There is no existing def at NewIdx. Hoist DefVNI.
+    if (!I->end.isDead()) {
+      // Leave the end point of a live def.
+      I->start = DefVNI->def;
+      return;
     }
-  }
 
-  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
-                            BundleRanges& BR) {
-    // TODO: Sane rules for moving ranges into bundles.
+    // DefVNI is a dead def. It may have been moved across other values in LI,
+    // so move I up to NewI. Slide [NewI;I) down one position.
+    std::copy_backward(NewI, I, llvm::next(I));
+    *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
-                               BundleRanges& BR) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFromInto(OldIdx, *II, BR);
+  void updateRegMaskSlots() {
+    SmallVectorImpl<SlotIndex>::iterator RI =
+      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+                       OldIdx);
+    assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+           "No RegMask at OldIdx.");
+    *RI = NewIdx.getRegSlot();
+    assert((RI == LIS.RegMaskSlots.begin() ||
+            SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+            "Cannot move regmask instruction above another call");
+    assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+            SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+            "Cannot move regmask instruction below another call");
   }
 
-  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
-                           BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-
-    assert(LR->start.isRegister() &&
-           "Don't know how to merge exiting ECs into bundles yet.");
+  // Return the last use of reg between NewIdx and OldIdx.
+  SlotIndex findLastUseBefore(unsigned Reg) {
+    SlotIndex LastUse = NewIdx;
 
-    if (LR->end > NewIdx.getDeadSlot()) {
-      // This range is becoming an exiting range on the bundle.
-      // If there was an old dead-def of this reg, delete it.
-      if (BR[LI->reg].Dead != 0) {
-        LI->removeRange(*BR[LI->reg].Dead);
-        BR[LI->reg].Dead = 0;
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      for (MachineRegisterInfo::use_nodbg_iterator
+             UI = MRI.use_nodbg_begin(Reg),
+             UE = MRI.use_nodbg_end();
+           UI != UE; UI.skipInstruction()) {
+        const MachineInstr* MI = &*UI;
+        SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+        if (InstSlot > LastUse && InstSlot < OldIdx)
+          LastUse = InstSlot;
       }
-      assert(BR[LI->reg].Def == 0 &&
-             "Can't have two defs for the same variable exiting a bundle.");
-      LR->start = NewIdx.getRegSlot();
-      LR->valno->def = LR->start;
-      BR[LI->reg].Def = LR;
     } else {
-      // This range is becoming internal to the bundle.
-      assert(LR->end == NewIdx.getRegSlot() &&
-             "Can't bundle def whose kill is before the bundle");
-      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
-        // Already have a def for this. Just delete range.
-        LI->removeRange(*LR);
-      } else {
-        // Make range dead, record.
-        LR->end = NewIdx.getDeadSlot();
-        BR[LI->reg].Dead = LR;
-        assert(BR[LI->reg].Use == LR &&
-               "Range becoming dead should currently be use.");
+      MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
+      MachineBasicBlock::iterator MII(MI);
+      ++MII;
+      MachineBasicBlock* MBB = MI->getParent();
+      for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){
+        for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
+                                        MOE = MII->operands_end();
+             MOI != MOE; ++MOI) {
+          const MachineOperand& mop = *MOI;
+          if (!mop.isReg() || mop.getReg() == 0 ||
+              TargetRegisterInfo::isVirtualRegister(mop.getReg()))
+            continue;
+
+          if (TRI.hasRegUnit(mop.getReg(), Reg))
+            LastUse = LIS.getInstructionIndex(MII);
+        }
       }
-      // In both cases the range is no longer a use on the bundle.
-      BR[LI->reg].Use = 0;
     }
+    return LastUse;
   }
-
-  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
-                              BundleRanges& BR) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFromInto(OldIdx, *EI, BR);
-  }
-
 };
 
-void LiveIntervals::handleMove(MachineInstr* MI) {
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
   SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   Indexes->removeMachineInstrFromMaps(MI);
-  SlotIndex NewIndex = MI->isInsideBundle() ?
-                        Indexes->getInstructionIndex(MI) :
-                        Indexes->insertMachineInstrInMaps(MI);
+  SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
   assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
          OldIndex < getMBBEndIdx(MI->getParent()) &&
          "Cannot handle moves across basic block boundaries.");
-  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
 
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesFrom(MI, OldIndex);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
 
 void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
-                                         MachineInstr* BundleStart) {
+                                         MachineInstr* BundleStart,
+                                         bool UpdateFlags) {
+  SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesInto(MI, BundleStart);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+  HME.updateAllRanges(MI);
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
index cd4e690..4d41fca 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
@@ -178,8 +178,8 @@ public:
     bool checkLoopInterference(MachineLoopRange*);
 
   private:
-    Query(const Query&);          // DO NOT IMPLEMENT
-    void operator=(const Query&); // DO NOT IMPLEMENT
+    Query(const Query&) LLVM_DELETED_FUNCTION;
+    void operator=(const Query&) LLVM_DELETED_FUNCTION;
   };
 
   // Array of LiveIntervalUnions.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index d828f25..c3ff4f1 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
   // Visit all operands that read Reg. This may include partial defs.
   for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
        E = MRI->reg_nodbg_end(); I != E; ++I) {
-    const MachineOperand &MO = I.getOperand();
+    MachineOperand &MO = I.getOperand();
+    // Clear all kill flags. They will be reinserted after register allocation
+    // by LiveIntervalAnalysis::addKillFlags().
+    if (MO.isUse())
+      MO.setIsKill(false);
     if (!MO.readsReg())
       continue;
     // MI is reading Reg. We may have visited MI before if it happens to be
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index b4ce9aa..f8fbc7d 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
     // We can't remat physreg uses, unless it is a constant.
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-      if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+      if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
         continue;
       return false;
     }
@@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
     const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
     if (!OVNI)
       continue;
+
+    // Don't allow rematerialization immediately after the original def.
+    // It would be incorrect if OrigMI redefines the register.
+    // See PR14098.
+    if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+      return false;
+
     if (OVNI != li.getVNInfoAt(UseIdx))
       return false;
   }
@@ -249,7 +256,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         unsigned Reg = MOI->getReg();
         if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
           // Check if MI reads any unreserved physregs.
-          if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+          if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
             ReadsPhysRegs = true;
           continue;
         }
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index cdb1776..7f22478 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
                                              unsigned PhysReg) {
   if (VirtReg.empty())
     return false;
+  CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
-    if (VirtReg.overlaps(LIS->getRegUnit(*Units)))
+    if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes()))
       return true;
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
index b3e2d7f..8f22c24 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
@@ -15,7 +15,7 @@
 // Register units are defined in MCRegisterInfo.h, they represent the smallest
 // unit of interference when dealing with overlapping physical registers. The
 // LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
-// a virtual register is assigned to a physicval register, the live range for
+// a virtual register is assigned to a physical register, the live range for
 // the virtual register is inserted into the LiveIntervalUnion for each regunit
 // in the physreg.
 //
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index 939e795..f0b522b 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,7 +25,10 @@
 using namespace llvm;
 
 char LiveStacks::ID = 0;
-INITIALIZE_PASS(LiveStacks, "livestacks",
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+                "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
                 "Live Stack Slot Analysis", false, false)
 
 char &llvm::LiveStacksID = LiveStacks::ID;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 348ed3a..6ea933d 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
 }
 
 void LiveVariables::VarInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
@@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const {
       dbgs() << "\n    #" << i << ": " << *Kills[i];
     dbgs() << "\n";
   }
+#endif
 }
 
 /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
@@ -501,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   MRI = &mf.getRegInfo();
   TRI = MF->getTarget().getRegisterInfo();
 
-  ReservedRegisters = TRI->getReservedRegs(mf);
-
   unsigned NumRegs = TRI->getNumRegs();
   PhysRegDef  = new MachineInstr*[NumRegs];
   PhysRegUse  = new MachineInstr*[NumRegs];
@@ -586,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = UseRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegUse(MOReg, MBB, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegUse(MOReg, MI);
       }
 
@@ -599,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
         unsigned MOReg = DefRegs[i];
         if (TargetRegisterInfo::isVirtualRegister(MOReg))
           HandleVirtRegDef(MOReg, MI);
-        else if (!ReservedRegisters[MOReg])
+        else if (!MRI->isReserved(MOReg))
           HandlePhysRegDef(MOReg, MI, Defs);
       }
       UpdatePhysRegDefs(MI, Defs);
@@ -806,18 +806,44 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
                                 MachineBasicBlock *SuccBB) {
   const unsigned NumNew = BB->getNumber();
 
-  // All registers used by PHI nodes in SuccBB must be live through BB.
-  for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
-         BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+  SmallSet<unsigned, 16> Defs, Kills;
+
+  MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+  for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+    // Record the def of the PHI node.
+    Defs.insert(BBI->getOperand(0).getReg());
+
+    // All registers used by PHI nodes in SuccBB must be live through BB.
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
       if (BBI->getOperand(i+1).getMBB() == BB)
         getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+  }
+
+  // Record all vreg defs and kills of all instructions in SuccBB.
+  for (; BBI != BBE; ++BBI) {
+    for (MachineInstr::mop_iterator I = BBI->operands_begin(),
+         E = BBI->operands_end(); I != E; ++I) {
+      if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+        if (I->isDef())
+          Defs.insert(I->getReg());
+        else if (I->isKill())
+          Kills.insert(I->getReg());
+      }
+    }
+  }
 
   // Update info for all live variables
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+    // If the Defs is defined in the successor it can't be live in BB.
+    if (Defs.count(Reg))
+      continue;
+
+    // If the register is either killed in or live through SuccBB it's also live
+    // through BB.
     VarInfo &VI = getVarInfo(Reg);
-    if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+    if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
       VI.AliveBlocks.set(NumNew);
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index fa6b450..18d021d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -21,7 +21,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Assembly/Writer.h"
@@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
   instr_iterator I = instr_begin(), E = instr_end();
   while (I != E && I->isPHI())
     ++I;
-  assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
+  assert((I == E || !I->isInsideBundle()) &&
+         "First non-phi MI cannot be inside a bundle!");
   return I;
 }
 
@@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
     ++I;
   // FIXME: This needs to change if we wish to bundle labels / dbg_values
   // inside the bundle.
-  assert(!I->isInsideBundle() &&
+  assert((I == E || !I->isInsideBundle()) &&
          "First non-phi / non-label instruction is inside a bundle!");
   return I;
 }
@@ -228,9 +229,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
   return 0;
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineBasicBlock::dump() const {
   print(dbgs());
 }
+#endif
 
 StringRef MachineBasicBlock::getName() const {
   if (const BasicBlock *LBB = getBasicBlock())
@@ -243,7 +246,7 @@ StringRef MachineBasicBlock::getName() const {
 std::string MachineBasicBlock::getFullName() const {
   std::string Name;
   if (getParent())
-    Name = (getParent()->getFunction()->getName() + ":").str();
+    Name = (getParent()->getName() + ":").str();
   if (getBasicBlock())
     Name += getBasicBlock()->getName();
   else
@@ -942,12 +945,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
 
 /// getSuccWeight - Return weight of the edge from this block to MBB.
 ///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
   if (Weights.empty())
     return 0;
 
-  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
-  return *getWeightIterator(I);
+  return *getWeightIterator(Succ);
 }
 
 /// getWeightIterator - Return wight iterator corresonding to the I successor
@@ -970,6 +972,80 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
   return Weights.begin() + index;
 }
 
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+/// 
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+                                           unsigned Reg, MachineInstr *MI,
+                                           unsigned Neighborhood) {
+  
+  unsigned N = Neighborhood;
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Start by searching backwards from MI, looking for kills, reads or defs.
+
+  MachineBasicBlock::iterator I(MI);
+  // If this is the first insn in the block, don't search backwards.
+  if (I != MBB->begin()) {
+    do {
+      --I;
+
+      MachineOperandIteratorBase::PhysRegInfo Analysis =
+        MIOperands(I).analyzePhysReg(Reg, TRI);
+
+      if (Analysis.Kills)
+        // Register killed, so isn't live.
+        return LQR_Dead;
+
+      else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap)
+        // Defined or read without a previous kill - live.
+        return (Analysis.Defines || Analysis.Reads) ? 
+          LQR_Live : LQR_OverlappingLive;
+
+    } while (I != MBB->begin() && --N > 0);
+  }
+
+  // Did we get to the start of the block?
+  if (I == MBB->begin()) {
+    // If so, the register's state is definitely defined by the live-in state.
+    for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
+         RAI.isValid(); ++RAI) {
+      if (MBB->isLiveIn(*RAI))
+        return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
+    }
+
+    return LQR_Dead;
+  }
+
+  N = Neighborhood;
+
+  // Try searching forwards from MI, looking for reads or defs.
+  I = MachineBasicBlock::iterator(MI);
+  // If this is the last insn in the block, don't search forwards.
+  if (I != MBB->end()) {
+    for (++I; I != MBB->end() && N > 0; ++I, --N) {
+      MachineOperandIteratorBase::PhysRegInfo Analysis =
+        MIOperands(I).analyzePhysReg(Reg, TRI);
+
+      if (Analysis.ReadsOverlap)
+        // Used, therefore must have been live.
+        return (Analysis.Reads) ?
+          LQR_Live : LQR_OverlappingLive;
+
+      else if (Analysis.DefinesOverlap)
+        // Defined (but not read) therefore cannot have been live.
+        return LQR_Dead;
+    }
+  }
+
+  // At this point we have no idea of the liveness of the register.
+  return LQR_Unknown;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c4dca2c..cd3f199 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain(
     assert(BB);
     assert(BlockToChain[BB] == &Chain);
     assert(*llvm::prior(Chain.end()) == BB);
-    MachineBasicBlock *BestSucc = 0;
 
     // Look for the best viable successor if there is one to place immediately
     // after this block.
-    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+    MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
 
     // If an immediate successor isn't available, look for the best viable
     // block among those we've identified as not violating the loop's CFG at
@@ -1014,7 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+  if (F.getFunction()->getFnAttributes().
+        hasAttribute(Attributes::OptimizeForSize))
     return;
   unsigned Align = TLI->getPrefLoopAlignment();
   if (!Align)
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0..4479211 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
   Scale = 1;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     Sum += Weight;
   }
 
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
   Sum = 0;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     Sum += Weight / Scale;
   }
   assert(Sum <= UINT32_MAX);
   return Sum;
 }
 
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
-                                            const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+              MachineBasicBlock::const_succ_iterator Dst) const {
   uint32_t Weight = Src->getSuccWeight(Dst);
   if (!Weight)
     return DEFAULT_WEIGHT;
   return Weight;
 }
 
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+              const MachineBasicBlock *Dst) const {
+  // This is a linear search. Try to use the const_succ_iterator version when
+  // possible.
+  return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
 bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
                                              MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
   MachineBasicBlock *MaxSucc = 0;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     if (Weight > MaxWeight) {
       MaxWeight = Weight;
       MaxSucc = *I;
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 896461f..dbc41de 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -63,8 +63,6 @@ namespace {
     virtual void releaseMemory() {
       ScopeMap.clear();
       Exps.clear();
-      AllocatableRegs.clear();
-      ReservedRegs.clear();
     }
 
   private:
@@ -78,8 +76,6 @@ namespace {
     ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
-    BitVector AllocatableRegs;
-    BitVector ReservedRegs;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -88,7 +84,8 @@ namespace {
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
                                SmallSet<unsigned,8> &PhysRefs,
-                               SmallVector<unsigned,2> &PhysDefs) const;
+                               SmallVector<unsigned,2> &PhysDefs,
+                               bool &PhysUseDef) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
                           SmallSet<unsigned,8> &PhysRefs,
                           SmallVector<unsigned,2> &PhysDefs,
@@ -198,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
                                        SmallSet<unsigned,8> &PhysRefs,
-                                       SmallVector<unsigned,2> &PhysDefs) const{
-  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+                                       SmallVector<unsigned,2> &PhysDefs,
+                                       bool &PhysUseDef) const{
+  // First, add all uses to PhysRefs.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
+    if (!MO.isReg() || MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    // If the def is dead, it's ok. But the def may not marked "dead". That's
-    // common since this pass is run before livevariables. We can scan
-    // forward a few instructions and check if it is obviously dead.
-    if (MO.isDef() &&
-        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
-      continue;
     // Reading constant physregs is ok.
     if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
-    if (MO.isDef())
+  }
+
+  // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
+  // (which currently contains only uses), set the PhysUseDef flag.
+  PhysUseDef = false;
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    // Check against PhysRefs even if the def is "dead".
+    if (PhysRefs.count(Reg))
+      PhysUseDef = true;
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
+    // common since this pass is run before livevariables. We can scan
+    // forward a few instructions and check if it is obviously dead.
+    if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
       PhysDefs.push_back(Reg);
   }
 
+  // Finally, add all defs to PhysRefs as well.
+  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+    for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+      PhysRefs.insert(*AI);
+
   return !PhysRefs.empty();
 }
 
@@ -242,7 +260,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
       return false;
 
     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
-      if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+      if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
         // Avoid extending live range of physical registers if they are
         //allocatable or reserved.
         return false;
@@ -411,8 +429,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
   assert(SI != ScopeMap.end());
-  ScopeMap.erase(SI);
   delete SI->second;
+  ScopeMap.erase(SI);
 }
 
 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
@@ -463,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     bool CrossMBBPhysDef = false;
     SmallSet<unsigned, 8> PhysRefs;
     SmallVector<unsigned, 2> PhysDefs;
-    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
+    bool PhysUseDef = false;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+                                          PhysDefs, PhysUseDef)) {
       FoundCSE = false;
 
       // ... Unless the CS is local or is in the sole predecessor block
       // and it also defines the physical register which is not clobbered
       // in between and the physical register uses were not clobbered.
-      unsigned CSVN = VNT.lookup(MI);
-      MachineInstr *CSMI = Exps[CSVN];
-      if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
-        FoundCSE = true;
+      // This can never be the case if the instruction both uses and
+      // defines the same physical register, which was detected above.
+      if (!PhysUseDef) {
+        unsigned CSVN = VNT.lookup(MI);
+        MachineInstr *CSMI = Exps[CSVN];
+        if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+          FoundCSE = true;
+      }
     }
 
     if (!FoundCSE) {
@@ -635,7 +659,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
-  AllocatableRegs = TRI->getAllocatableSet(MF);
-  ReservedRegs = TRI->getReservedRegs(MF);
   return PerformCSE(DT->getRootNode());
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index bac3aa2..4a79328 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
 namespace {
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
-    BitVector ReservedRegs;
+    MachineRegisterInfo *MRI;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
       if (CI != AvailCopyMap.end()) {
         MachineInstr *CopyMI = CI->second;
-        if (!ReservedRegs.test(Def) &&
-            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+        if (!MRI->isReserved(Def) &&
+            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
             isNopCopy(CopyMI, Def, Src, TRI)) {
           // The two copies cancel out and the source of the first copy
           // hasn't been overridden, eliminate the second one. e.g.
@@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
            DI != DE; ++DI) {
         unsigned Reg = (*DI)->getOperand(0).getReg();
-        if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+        if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
         (*DI)->eraseFromParent();
         Changed = true;
@@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
     for (SmallSetVector<MachineInstr*, 8>::iterator
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
          DI != DE; ++DI) {
-      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+      if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
         (*DI)->eraseFromParent();
         Changed = true;
         ++NumDeletes;
@@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
-  ReservedRegs = TRI->getReservedRegs(MF);
+  MRI = &MF.getRegInfo();
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
     Changed |= CopyPropagateBlock(*I);
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index d4aede8a..91d5211 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -28,7 +28,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetFrameLowering.h"
@@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
     RegInfo = 0;
   MFInfo = 0;
   FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
-  if (Fn->hasFnAttr(Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
-        Fn->getAttributes().getFnAttributes()));
-  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+  if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment))
+    FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+                                  getFnAttributes().getStackAlignment());
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
   Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
-  if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+  if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
     Alignment = std::max(Alignment,
                          TM.getTargetLowering()->getPrefFunctionAlignment());
   FunctionNumber = FunctionNum;
@@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
   return std::make_pair(Result, Result + Num);
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineFunction::dump() const {
   print(dbgs());
 }
+#endif
+
+StringRef MachineFunction::getName() const {
+  assert(getFunction() && "No function!");
+  return getFunction()->getName();
+}
 
 void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
-  OS << "# Machine code for function " << Fn->getName() << ": ";
+  OS << "# Machine code for function " << getName() << ": ";
   if (RegInfo) {
     OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
     if (!RegInfo->tracksLiveness())
@@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     BB->print(OS, Indexes);
   }
 
-  OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+  OS << "\n# End machine code for function " << getName() << ".\n\n";
 }
 
 namespace llvm {
@@ -344,7 +351,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getFunction()->getName().str() + "' function";
+      return "CFG for '" + F->getName().str() + "' function";
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -377,7 +384,7 @@ namespace llvm {
 void MachineFunction::viewCFG() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName());
+  ViewGraph(this, "mf" + getName());
 #else
   errs() << "MachineFunction::viewCFG is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const
 void MachineFunction::viewCFGOnly() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getName(), true);
+  ViewGraph(this, "mf" + getName(), true);
 #else
   errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   unsigned StackAlign = TFI.getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/false, false));
+                                              /*isSS*/   false,
+                                              /*NeedSP*/ false,
+                                              /*Alloca*/ 0));
   return -++NumFixedObjects;
 }
 
@@ -525,16 +534,18 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   }
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
   print(MF, dbgs());
 }
+#endif
 
 //===----------------------------------------------------------------------===//
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
 
 /// getEntrySize - Return the size of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   // The size of a jump table entry is 4 bytes unless the entry is just the
   // address of a block, in which case it is the pointer size.
   switch (getEntryKind()) {
@@ -553,7 +564,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
 }
 
 /// getEntryAlignment - Return the alignment of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // The alignment of a jump table entry is the alignment of int32 unless the
   // entry is just the address of a block, in which case it is the pointer
   // alignment.
@@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
 
 
 //===----------------------------------------------------------------------===//
@@ -657,7 +670,7 @@ MachineConstantPool::~MachineConstantPool() {
 /// CanShareConstantPoolEntry - Test whether the given two constants
 /// can be allocated the same constant pool entry.
 static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
-                                      const TargetData *TD) {
+                                      const DataLayout *TD) {
   // Handle the trivial case quickly.
   if (A == B) return true;
 
@@ -681,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // Try constant folding a bitcast of both instructions to an integer.  If we
   // get two identical ConstantInt's, then we are good to share them.  We use
   // the constant folding APIs to do this so that we get the benefit of
-  // TargetData.
+  // DataLayout.
   if (isa<PointerType>(A->getType()))
     A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
                                  const_cast<Constant*>(A), TD);
@@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const {
     if (Constants[i].isMachineConstantPoolEntry())
       Constants[i].Val.MachineCPVal->print(OS);
     else
-      OS << *(Value*)Constants[i].Val.ConstVal;
+      OS << *(const Value*)Constants[i].Val.ConstVal;
     OS << ", align=" << Constants[i].getAlignment();
     OS << "\n";
   }
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0102ac7..ed94efb 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
 char MachineFunctionPrinterPass::ID = 0;
 }
 
-char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
 INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
                 "Machine Function Printer", false, false)
 
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index b166849..ce8d520 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) {
 /// the specified value.  If an operand is known to be an immediate already,
 /// the setImm method should be used.
 void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+  assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
   // If this operand is currently a register operand, and if this is in a
   // function, deregister the operand from the register's use/def list.
   if (isReg() && isOnRegUseList())
@@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
         RegInfo = &MF->getRegInfo();
   // If this operand is already a register operand, remove it from the
   // register's use/def lists.
-  if (RegInfo && isReg())
+  bool WasReg = isReg();
+  if (RegInfo && WasReg)
     RegInfo->removeRegOperandFromUseList(this);
 
   // Change this to a register and set the reg#.
@@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsDebug = isDebug;
   // Ensure isOnRegUseList() returns false.
   Contents.Reg.Prev = 0;
+  // Preserve the tie when the operand was already a register.
+  if (!WasReg)
+    TiedTo = 0;
 
   // If this operand is embedded in a function, add the operand to the
   // register's use/def list.
@@ -193,7 +198,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return !strcmp(getSymbolName(), Other.getSymbolName()) &&
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
-    return getBlockAddress() == Other.getBlockAddress();
+    return getBlockAddress() == Other.getBlockAddress() &&
+           getOffset() == Other.getOffset();
   case MO_RegisterMask:
     return getRegMask() == Other.getRegMask();
   case MachineOperand::MO_MCSymbol:
@@ -208,8 +214,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
 hash_code llvm::hash_value(const MachineOperand &MO) {
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(),
-                        MO.getSubReg(), MO.isDef());
+    // Register operands don't have target flags.
+    return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
   case MachineOperand::MO_Immediate:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
   case MachineOperand::MO_CImmediate:
@@ -234,7 +240,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
                         MO.getOffset());
   case MachineOperand::MO_BlockAddress:
     return hash_combine(MO.getType(), MO.getTargetFlags(),
-                        MO.getBlockAddress());
+                        MO.getBlockAddress(), MO.getOffset());
   case MachineOperand::MO_RegisterMask:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
   case MachineOperand::MO_Metadata:
@@ -262,7 +268,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
-        isInternalRead() || isEarlyClobber()) {
+        isInternalRead() || isEarlyClobber() || isTied()) {
       OS << '<';
       bool NeedComma = false;
       if (isDef()) {
@@ -282,27 +288,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
           NeedComma = true;
       }
 
-      if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) {
+      if (isKill()) {
         if (NeedComma) OS << ',';
-        NeedComma = false;
-        if (isKill()) {
-          OS << "kill";
-          NeedComma = true;
-        }
-        if (isDead()) {
-          OS << "dead";
-          NeedComma = true;
-        }
-        if (isUndef() && isUse()) {
-          if (NeedComma) OS << ',';
-          OS << "undef";
-          NeedComma = true;
-        }
-        if (isInternalRead()) {
-          if (NeedComma) OS << ',';
-          OS << "internal";
-          NeedComma = true;
-        }
+        OS << "kill";
+        NeedComma = true;
+      }
+      if (isDead()) {
+        if (NeedComma) OS << ',';
+        OS << "dead";
+        NeedComma = true;
+      }
+      if (isUndef() && isUse()) {
+        if (NeedComma) OS << ',';
+        OS << "undef";
+        NeedComma = true;
+      }
+      if (isInternalRead()) {
+        if (NeedComma) OS << ',';
+        OS << "internal";
+        NeedComma = true;
+      }
+      if (isTied()) {
+        if (NeedComma) OS << ',';
+        OS << "tied";
+        if (TiedTo != 15)
+          OS << unsigned(TiedTo - 1);
+        NeedComma = true;
       }
       OS << '>';
     }
@@ -352,6 +363,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_BlockAddress:
     OS << '<';
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
   case MachineOperand::MO_RegisterMask:
@@ -528,20 +540,6 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
 /// implicit operands. It reserves space for the number of operands specified by
 /// the MCInstrDesc.
-MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0) {
-  unsigned NumImplicitOps = 0;
-  if (!NoImp)
-    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  if (!NoImp)
-    addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
-/// MachineInstr ctor - As above, but with a DebugLoc.
 MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
@@ -559,21 +557,6 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified
 /// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0) {
-  assert(MBB && "Cannot use inserting ctor with null basic block!");
-  unsigned NumImplicitOps =
-    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-  MBB->push_back(this);  // Add instruction to end of basic block!
-}
-
-/// MachineInstr ctor - As above, but with a DebugLoc.
-///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
@@ -673,6 +656,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
   if (!isImpReg && !isInlineAsm()) {
     while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
       --OpNo;
+      assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
       if (RegInfo)
         RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
     }
@@ -708,12 +692,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
   if (Operands[OpNo].isReg()) {
     // Ensure isOnRegUseList() returns false, regardless of Op's status.
     Operands[OpNo].Contents.Reg.Prev = 0;
+    // Ignore existing ties. This is not a property that can be copied.
+    Operands[OpNo].TiedTo = 0;
     // Add the new operand to RegInfo.
     if (RegInfo)
       RegInfo->addRegOperandToUseList(&Operands[OpNo]);
-    // If the register operand is flagged as early, mark the operand as such.
-    if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
-      Operands[OpNo].setIsEarlyClobber(true);
+    // The MCID operand information isn't accurate until we start adding
+    // explicit operands. The implicit operands are added first, then the
+    // explicits are inserted before them.
+    if (!isImpReg) {
+      // Tie uses to defs as indicated in MCInstrDesc.
+      if (Operands[OpNo].isUse()) {
+        int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+        if (DefIdx != -1)
+          tieOperands(DefIdx, OpNo);
+      }
+      // If the register operand is flagged as early, mark the operand as such.
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+        Operands[OpNo].setIsEarlyClobber(true);
+    }
   }
 
   // Re-add all the implicit ops.
@@ -730,6 +727,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 ///
 void MachineInstr::RemoveOperand(unsigned OpNo) {
   assert(OpNo < Operands.size() && "Invalid operand number");
+  untieRegOperand(OpNo);
   MachineRegisterInfo *RegInfo = getRegInfo();
 
   // Special case removing the last one.
@@ -752,6 +750,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
     }
   }
 
+#ifndef NDEBUG
+  // Moving tied operands would break the ties.
+  for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i)
+    if (Operands[i].isReg())
+      assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
   Operands.erase(Operands.begin()+OpNo);
 
   if (RegInfo) {
@@ -935,6 +940,12 @@ bool MachineInstr::isStackAligningInlineAsm() const {
   return false;
 }
 
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+  assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+  unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+  return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
 int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
                                        unsigned *GroupNo) const {
   assert(isInlineAsm() && "Expected an inline asm instruction");
@@ -1004,9 +1015,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
 unsigned MachineInstr::getBundleSize() const {
   assert(isBundle() && "Expecting a bundle");
 
-  MachineBasicBlock::const_instr_iterator I = *this;
+  const MachineBasicBlock *MBB = getParent();
+  MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end();
   unsigned Size = 0;
-  while ((++I)->isInsideBundle()) {
+  while ((++I != E) && I->isInsideBundle()) {
     ++Size;
   }
   assert(Size > 1 && "Malformed bundle");
@@ -1114,107 +1126,99 @@ int MachineInstr::findFirstPredOperandIdx() const {
   return -1;
 }
 
-/// isRegTiedToUseOperand - Given the index of a register def operand,
-/// check if the register def is tied to a source operand, due to either
-/// two-address elimination or inline assembly constraints. Returns the
-/// first tied use operand index by reference is UseOpIdx is not null.
-bool MachineInstr::
-isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
-  if (isInlineAsm()) {
-    assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
-    const MachineOperand &MO = getOperand(DefOpIdx);
-    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
-      return false;
-    // Determine the actual operand index that corresponds to this index.
-    unsigned DefNo = 0;
-    int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo);
-    if (FlagIdx < 0)
-      return false;
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
 
-    // Which part of the group is DefOpIdx?
-    unsigned DefPart = DefOpIdx - (FlagIdx + 1);
-
-    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
-         i != e; ++i) {
-      const MachineOperand &FMO = getOperand(i);
-      if (!FMO.isImm())
-        continue;
-      if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
-        continue;
-      unsigned Idx;
-      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
-          Idx == DefNo) {
-        if (UseOpIdx)
-          *UseOpIdx = (unsigned)i + 1 + DefPart;
-        return true;
-      }
-    }
-    return false;
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0:              Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax:        Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+  MachineOperand &DefMO = getOperand(DefIdx);
+  MachineOperand &UseMO = getOperand(UseIdx);
+  assert(DefMO.isDef() && "DefIdx must be a def operand");
+  assert(UseMO.isUse() && "UseIdx must be a use operand");
+  assert(!DefMO.isTied() && "Def is already tied to another use");
+  assert(!UseMO.isTied() && "Use is already tied to another def");
+
+  if (DefIdx < TiedMax)
+    UseMO.TiedTo = DefIdx + 1;
+  else {
+    // Inline asm can use the group descriptors to find tied operands, but on
+    // normal instruction, the tied def must be within the first TiedMax
+    // operands.
+    assert(isInlineAsm() && "DefIdx out of range");
+    UseMO.TiedTo = TiedMax;
   }
 
-  assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
-  const MCInstrDesc &MCID = getDesc();
-  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = getOperand(i);
-    if (MO.isReg() && MO.isUse() &&
-        MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
-      if (UseOpIdx)
-        *UseOpIdx = (unsigned)i;
-      return true;
-    }
-  }
-  return false;
+  // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+  DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
 }
 
-/// isRegTiedToDefOperand - Return true if the operand of the specified index
-/// is a register use and it is tied to an def operand. It also returns the def
-/// operand index by reference.
-bool MachineInstr::
-isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
-  if (isInlineAsm()) {
-    const MachineOperand &MO = getOperand(UseOpIdx);
-    if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
-      return false;
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+  const MachineOperand &MO = getOperand(OpIdx);
+  assert(MO.isTied() && "Operand isn't tied");
 
-    // Find the flag operand corresponding to UseOpIdx
-    int FlagIdx = findInlineAsmFlagIdx(UseOpIdx);
-    if (FlagIdx < 0)
-      return false;
+  // Normally TiedTo is in range.
+  if (MO.TiedTo < TiedMax)
+    return MO.TiedTo - 1;
 
-    const MachineOperand &UFMO = getOperand(FlagIdx);
-    unsigned DefNo;
-    if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
-      if (!DefOpIdx)
-        return true;
-
-      unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
-      // Remember to adjust the index. First operand is asm string, second is
-      // the HasSideEffects and AlignStack bits, then there is a flag for each.
-      while (DefNo) {
-        const MachineOperand &FMO = getOperand(DefIdx);
-        assert(FMO.isImm());
-        // Skip over this def.
-        DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
-        --DefNo;
-      }
-      *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
-      return true;
+  // Uses on normal instructions can be out of range.
+  if (!isInlineAsm()) {
+    // Normal tied defs must be in the 0..TiedMax-1 range.
+    if (MO.isUse())
+      return TiedMax - 1;
+    // MO is a def. Search for the tied use.
+    for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+      const MachineOperand &UseMO = getOperand(i);
+      if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+        return i;
     }
-    return false;
+    llvm_unreachable("Can't find tied use");
   }
 
-  const MCInstrDesc &MCID = getDesc();
-  if (UseOpIdx >= MCID.getNumOperands())
-    return false;
-  const MachineOperand &MO = getOperand(UseOpIdx);
-  if (!MO.isReg() || !MO.isUse())
-    return false;
-  int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
-  if (DefIdx == -1)
-    return false;
-  if (DefOpIdx)
-    *DefOpIdx = (unsigned)DefIdx;
-  return true;
+  // Now deal with inline asm by parsing the operand group descriptor flags.
+  // Find the beginning of each operand group.
+  SmallVector<unsigned, 8> GroupIdx;
+  unsigned OpIdxGroup = ~0u;
+  unsigned NumOps;
+  for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+       i += NumOps) {
+    const MachineOperand &FlagMO = getOperand(i);
+    assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+    unsigned CurGroup = GroupIdx.size();
+    GroupIdx.push_back(i);
+    NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+    // OpIdx belongs to this operand group.
+    if (OpIdx > i && OpIdx < i + NumOps)
+      OpIdxGroup = CurGroup;
+    unsigned TiedGroup;
+    if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+      continue;
+    // Operands in this group are tied to operands in TiedGroup which must be
+    // earlier. Find the number of operands between the two groups.
+    unsigned Delta = i - GroupIdx[TiedGroup];
+
+    // OpIdx is a use tied to TiedGroup.
+    if (OpIdxGroup == CurGroup)
+      return OpIdx - Delta;
+
+    // OpIdx is a def tied to this use group.
+    if (OpIdxGroup == TiedGroup)
+      return OpIdx + Delta;
+  }
+  llvm_unreachable("Invalid tied operand on inline asm");
 }
 
 /// clearKillInfo - Clears kill flags on all operands.
@@ -1292,7 +1296,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (mayStore() || isCall()) {
+  //
+  // Treat volatile loads as stores. This is not strictly necessary for
+  // volatiles, but it is required for atomic loads. It is not allowed to move
+  // a load across an atomic load with Ordering > Monotonic.
+  if (mayStore() || isCall() ||
+      (mayLoad() && hasOrderedMemoryRef())) {
     SawStore = true;
     return false;
   }
@@ -1308,8 +1317,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // load.
   if (mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
-    // end of block, or if the load is volatile, we can't move it.
-    return !SawStore && !hasVolatileMemoryRef();
+    // end of block, we can't move it.
+    return !SawStore;
 
   return true;
 }
@@ -1340,11 +1349,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
   return true;
 }
 
-/// hasVolatileMemoryRef - Return true if this instruction may have a
-/// volatile memory reference, or if the information describing the
-/// memory reference is not available. Return false if it is known to
-/// have no volatile memory references.
-bool MachineInstr::hasVolatileMemoryRef() const {
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
   if (!mayStore() &&
       !mayLoad() &&
@@ -1357,9 +1366,9 @@ bool MachineInstr::hasVolatileMemoryRef() const {
   if (memoperands_empty())
     return true;
 
-  // Check the memory reference information for volatile references.
+  // Check the memory reference information for ordered references.
   for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
-    if ((*I)->isVolatile())
+    if (!(*I)->isUnordered())
       return true;
 
   return false;
@@ -1461,7 +1470,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
 }
 
 void MachineInstr::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "  " << *this;
+#endif
 }
 
 static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
@@ -1540,6 +1551,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
       OS << " [sideeffect]";
     if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
       OS << " [alignstack]";
+    if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+      OS << " [attdialect]";
+    if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+      OS << " [inteldialect]";
 
     StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
     FirstOp = false;
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index b7de7bf..1f7fbfc 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -109,10 +109,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
   MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
                                     TII->get(TargetOpcode::BUNDLE));
 
-  SmallVector<unsigned, 8> LocalDefs;
-  SmallSet<unsigned, 8> LocalDefSet;
+  SmallVector<unsigned, 32> LocalDefs;
+  SmallSet<unsigned, 32> LocalDefSet;
   SmallSet<unsigned, 8> DeadDefSet;
-  SmallSet<unsigned, 8> KilledDefSet;
+  SmallSet<unsigned, 16> KilledDefSet;
   SmallVector<unsigned, 8> ExternUses;
   SmallSet<unsigned, 8> ExternUseSet;
   SmallSet<unsigned, 8> KilledUseSet;
@@ -181,7 +181,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
     Defs.clear();
   }
 
-  SmallSet<unsigned, 8> Added;
+  SmallSet<unsigned, 32> Added;
   for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
     unsigned Reg = LocalDefs[i];
     if (Added.insert(Reg)) {
@@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
 // MachineOperand iterator
 //===----------------------------------------------------------------------===//
 
-MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::VirtRegInfo
 MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
                     SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
-  RegInfo RI = { false, false, false };
+  VirtRegInfo RI = { false, false, false };
   for(; isValid(); ++*this) {
     MachineOperand &MO = deref();
     if (!MO.isReg() || MO.getReg() != Reg)
@@ -276,3 +276,53 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
   }
   return RI;
 }
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+                                           const TargetRegisterInfo *TRI) {
+  bool AllDefsDead = true;
+  PhysRegInfo PRI = {false, false, false, false, false, false, false};
+
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         "analyzePhysReg not given a physical register!");
+  for (; isValid(); ++*this) {
+    MachineOperand &MO = deref();
+
+    if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+      PRI.Clobbers = true;    // Regmask clobbers Reg.
+
+    if (!MO.isReg())
+      continue;
+
+    unsigned MOReg = MO.getReg();
+    if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+      continue;
+
+    bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+    bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
+
+    if (IsRegOrSuperReg && MO.readsReg()) {
+      // Reg or a super-reg is read, and perhaps killed also.
+      PRI.Reads = true;
+      PRI.Kills = MO.isKill();
+    } if (IsRegOrOverlapping && MO.readsReg()) {
+      PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
+    }
+
+    if (!MO.isDef())
+      continue;
+
+    if (IsRegOrSuperReg) {
+      PRI.Defines = true;     // Reg or a super-register is defined.
+      if (!MO.isDead())
+        AllDefsDead = false;
+    }
+    if (IsRegOrOverlapping)
+      PRI.Clobbers = true;    // Reg or an overlapping reg is defined.
+  }
+
+  if (AllDefsDead && PRI.Defines)
+    PRI.DefinesDead = true;   // Reg or super-register was defined and was dead.
+
+  return PRI;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index efec481..169443e 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
   else
     DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
-  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
+  DEBUG(dbgs() << MF.getName() << " ********\n");
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 9f3829e..27afeec 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   return BotMBB;
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineLoop::dump() const {
   print(dbgs());
 }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index ea98b23..005bf78 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -25,7 +25,7 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-// Handle the Pass registration stuff necessary to use TargetData's.
+// Handle the Pass registration stuff necessary to use DataLayout's.
 INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
                 "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 5ab56c0..a1c7e9f 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -21,8 +21,8 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 // Out of line virtual method.
-void MachineModuleInfoMachO::Anchor() {}
-void MachineModuleInfoELF::Anchor() {}
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
   typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 0000000..c3f6e92
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+                "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+  initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+  DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+                                                       // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+  return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+  delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+  DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 5fb938f..95d7a7d 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
   : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  UsedPhysRegs.resize(TRI.getNumRegs());
+  UsedRegUnits.resize(TRI.getNumRegUnits());
   UsedPhysRegMask.resize(TRI.getNumRegs());
 
   // Create the physreg use/def lists.
@@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
 MachineRegisterInfo::~MachineRegisterInfo() {
 #ifndef NDEBUG
   clearVirtRegs();
-  for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
     assert(!PhysRegUseDefLists[i] &&
            "PhysRegUseDefLists has entries after all instructions are deleted");
 #endif
@@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
 
 void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
   ReservedRegs = TRI->getReservedRegs(MF);
+  assert(ReservedRegs.size() == TRI->getNumRegs() &&
+         "Invalid ReservedRegs vector from target");
 }
 
 bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
                                             const MachineFunction &MF) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
 
-  // Check if any overlapping register is modified.
+  // Check if any overlapping register is modified, or allocatable so it may be
+  // used later.
   for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (!def_empty(*AI))
-      return false;
-
-  // Check if any overlapping register is allocatable so it may be used later.
-  if (AllocatableRegs.empty())
-    AllocatableRegs = TRI->getAllocatableSet(MF);
-  for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
-    if (AllocatableRegs.test(*AI))
+    if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index a1dc948..a4817d0 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -18,11 +18,8 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -35,10 +32,12 @@
 
 using namespace llvm;
 
-static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
-                                  cl::desc("Force top-down list scheduling"));
-static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
-                                  cl::desc("Force bottom-up list scheduling"));
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+                           cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+                            cl::desc("Force bottom-up list scheduling"));
+}
 
 #ifndef NDEBUG
 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -50,6 +49,15 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
+// Threshold to very roughly model an out-of-order processor's instruction
+// buffers. If the actual value of this threshold matters much in practice, then
+// it can be specified by the machine model. For now, it's an experimental
+// tuning knob to determine when and if it matters.
+static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
+  cl::desc("Allow expected latency to exceed the critical path by N cycles "
+           "before attempting to balance ILP"),
+  cl::init(10U));
+
 //===----------------------------------------------------------------------===//
 // Machine Instruction Scheduling Pass and Registry
 //===----------------------------------------------------------------------===//
@@ -221,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
     // The Scheduler may insert instructions during either schedule() or
     // exitRegion(), even for empty regions. So the local iterators 'I' and
     // 'RegionEnd' are invalid across these calls.
-    unsigned RemainingCount = MBB->size();
+    unsigned RemainingInstrs = MBB->size();
     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
 
@@ -230,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
         --RegionEnd;
         // Count the boundary instruction.
-        --RemainingCount;
+        --RemainingInstrs;
       }
 
       // The next region starts above the previous region. Look backward in the
       // instruction stream until we find the nearest boundary.
       MachineBasicBlock::iterator I = RegionEnd;
-      for(;I != MBB->begin(); --I, --RemainingCount) {
+      for(;I != MBB->begin(); --I, --RemainingInstrs) {
         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
           break;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
-      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
 
       // Skip empty scheduling regions (0 or 1 schedulable instructions).
       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
@@ -252,11 +260,11 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
         continue;
       }
       DEBUG(dbgs() << "********** MI Scheduling **********\n");
-      DEBUG(dbgs() << MF->getFunction()->getName()
+      DEBUG(dbgs() << MF->getName()
             << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
             else dbgs() << "End";
-            dbgs() << " Remaining: " << RemainingCount << "\n");
+            dbgs() << " Remaining: " << RemainingInstrs << "\n");
 
       // Schedule a region: possibly reorder instructions.
       // This invalidates 'RegionEnd' and 'I'.
@@ -269,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
       // scheduler for the top of it's scheduled region.
       RegionEnd = Scheduler->begin();
     }
-    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
     Scheduler->finishBlock();
   }
   Scheduler->finalizeSchedule();
@@ -281,157 +289,20 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
   // unimplemented
 }
 
-//===----------------------------------------------------------------------===//
-// MachineSchedStrategy - Interface to a machine scheduling algorithm.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ScheduleDAGMI;
-
-/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
-/// scheduling algorithm.
-///
-/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
-/// in ScheduleDAGInstrs.h
-class MachineSchedStrategy {
-public:
-  virtual ~MachineSchedStrategy() {}
-
-  /// Initialize the strategy after building the DAG for a new region.
-  virtual void initialize(ScheduleDAGMI *DAG) = 0;
-
-  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
-  /// schedule the node at the top of the unscheduled region. Otherwise it will
-  /// be scheduled at the bottom.
-  virtual SUnit *pickNode(bool &IsTopNode) = 0;
-
-  /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node.
-  virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
-
-  /// When all predecessor dependencies have been resolved, free this node for
-  /// top-down scheduling.
-  virtual void releaseTopNode(SUnit *SU) = 0;
-  /// When all successor dependencies have been resolved, free this node for
-  /// bottom-up scheduling.
-  virtual void releaseBottomNode(SUnit *SU) = 0;
-};
-} // namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ReadyQueue::dump() {
+  dbgs() << Name << ": ";
+  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+    dbgs() << Queue[i]->NodeNum << " ";
+  dbgs() << "\n";
+}
+#endif
 
 //===----------------------------------------------------------------------===//
 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
 // preservation.
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
-/// machine instructions while updating LiveIntervals.
-class ScheduleDAGMI : public ScheduleDAGInstrs {
-  AliasAnalysis *AA;
-  RegisterClassInfo *RegClassInfo;
-  MachineSchedStrategy *SchedImpl;
-
-  MachineBasicBlock::iterator LiveRegionEnd;
-
-  /// Register pressure in this region computed by buildSchedGraph.
-  IntervalPressure RegPressure;
-  RegPressureTracker RPTracker;
-
-  /// List of pressure sets that exceed the target's pressure limit before
-  /// scheduling, listed in increasing set ID order. Each pressure set is paired
-  /// with its max pressure in the currently scheduled regions.
-  std::vector<PressureElement> RegionCriticalPSets;
-
-  /// The top of the unscheduled zone.
-  MachineBasicBlock::iterator CurrentTop;
-  IntervalPressure TopPressure;
-  RegPressureTracker TopRPTracker;
-
-  /// The bottom of the unscheduled zone.
-  MachineBasicBlock::iterator CurrentBottom;
-  IntervalPressure BotPressure;
-  RegPressureTracker BotRPTracker;
-
-#ifndef NDEBUG
-  /// The number of instructions scheduled so far. Used to cut off the
-  /// scheduler at the point determined by misched-cutoff.
-  unsigned NumInstrsScheduled;
-#endif
-public:
-  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
-    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
-    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
-    RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
-    CurrentBottom(), BotRPTracker(BotPressure) {
-#ifndef NDEBUG
-    NumInstrsScheduled = 0;
-#endif
-  }
-
-  ~ScheduleDAGMI() {
-    delete SchedImpl;
-  }
-
-  MachineBasicBlock::iterator top() const { return CurrentTop; }
-  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
-
-  /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
-  /// region. This covers all instructions in a block, while schedule() may only
-  /// cover a subset.
-  void enterRegion(MachineBasicBlock *bb,
-                   MachineBasicBlock::iterator begin,
-                   MachineBasicBlock::iterator end,
-                   unsigned endcount);
-
-  /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
-  /// reorderable instructions.
-  void schedule();
-
-  /// Get current register pressure for the top scheduled instructions.
-  const IntervalPressure &getTopPressure() const { return TopPressure; }
-  const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
-
-  /// Get current register pressure for the bottom scheduled instructions.
-  const IntervalPressure &getBotPressure() const { return BotPressure; }
-  const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
-
-  /// Get register pressure for the entire scheduling region before scheduling.
-  const IntervalPressure &getRegPressure() const { return RegPressure; }
-
-  const std::vector<PressureElement> &getRegionCriticalPSets() const {
-    return RegionCriticalPSets;
-  }
-
-  /// getIssueWidth - Return the max instructions per scheduling group.
-  unsigned getIssueWidth() const {
-    return (InstrItins && InstrItins->SchedModel)
-      ? InstrItins->SchedModel->IssueWidth : 1;
-  }
-
-  /// getNumMicroOps - Return the number of issue slots required for this MI.
-  unsigned getNumMicroOps(MachineInstr *MI) const {
-    if (!InstrItins) return 1;
-    int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
-    return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
-  }
-
-protected:
-  void initRegPressure();
-  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
-
-  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
-  bool checkSchedLimit();
-
-  void releaseRoots();
-
-  void releaseSucc(SUnit *SU, SDep *SuccEdge);
-  void releaseSuccessors(SUnit *SU);
-  void releasePred(SUnit *SU, SDep *PredEdge);
-  void releasePredecessors(SUnit *SU);
-
-  void placeDebugValues();
-};
-} // namespace
-
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 /// NumPredsLeft reaches zero, release the successor node.
 ///
@@ -498,7 +369,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
   BB->splice(InsertPos, BB, MI);
 
   // Update LiveIntervals
-  LIS->handleMove(MI);
+  LIS->handleMove(MI, /*UpdateFlags=*/true);
 
   // Recede RegionBegin if an instruction moves above the first.
   if (RegionBegin == InsertPos)
@@ -565,6 +436,9 @@ void ScheduleDAGMI::initRegPressure() {
   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
     unsigned Limit = TRI->getRegPressureSetLimit(i);
+    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+          << "Limit " << Limit
+          << " Actual " << RegionPressure[i] << "\n");
     if (RegionPressure[i] > Limit)
       RegionCriticalPSets.push_back(PressureElement(i, 0));
   }
@@ -587,6 +461,74 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
   }
 }
 
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easilly extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMI then it will want to override this virtual method in order to
+/// update any specialized state.
+void ScheduleDAGMI::schedule() {
+  buildDAGWithRegPressure();
+
+  postprocessDAG();
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  if (ViewMISchedDAGs) viewGraph();
+
+  initQueues();
+
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    assert(!SU->isScheduled && "Node already scheduled");
+    if (!checkSchedLimit())
+      break;
+
+    scheduleMI(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+
+  DEBUG({
+      unsigned BBNum = top()->getParent()->getNumber();
+      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMI::buildDAGWithRegPressure() {
+  // Initialize the register pressure tracker used by buildSchedGraph.
+  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+  // Account for liveness generate by the region boundary.
+  if (LiveRegionEnd != RegionEnd)
+    RPTracker.recede();
+
+  // Build the DAG, and compute current register pressure.
+  buildSchedGraph(AA, &RPTracker);
+  if (ViewMISchedDAGs) viewGraph();
+
+  // Initialize top/bottom trackers after computing region pressure.
+  initRegPressure();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+    Mutations[i]->apply(this);
+  }
+}
+
 // Release all DAG roots for scheduling.
 void ScheduleDAGMI::releaseRoots() {
   SmallVector<SUnit*, 16> BotRoots;
@@ -607,28 +549,10 @@ void ScheduleDAGMI::releaseRoots() {
     SchedImpl->releaseBottomNode(*I);
 }
 
-/// schedule - Called back from MachineScheduler::runOnMachineFunction
-/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
-/// only includes instructions that have DAG nodes, not scheduling boundaries.
-void ScheduleDAGMI::schedule() {
-  // Initialize the register pressure tracker used by buildSchedGraph.
-  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
-
-  // Account for liveness generate by the region boundary.
-  if (LiveRegionEnd != RegionEnd)
-    RPTracker.recede();
-
-  // Build the DAG, and compute current register pressure.
-  buildSchedGraph(AA, &RPTracker);
-
-  // Initialize top/bottom trackers after computing region pressure.
-  initRegPressure();
-
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
-
-  if (ViewMISchedDAGs) viewGraph();
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues() {
 
+  // Initialize the strategy before modifying the DAG.
   SchedImpl->initialize(this);
 
   // Release edges from the special Entry node or to the special Exit node.
@@ -638,61 +562,64 @@ void ScheduleDAGMI::schedule() {
   // Release all DAG roots for scheduling.
   releaseRoots();
 
+  SchedImpl->registerRoots();
+
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
   CurrentBottom = RegionEnd;
-  bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
-    if (!checkSchedLimit())
-      break;
-
-    // Move the instruction to its new location in the instruction stream.
-    MachineInstr *MI = SU->getInstr();
-
-    if (IsTopNode) {
-      assert(SU->isTopReady() && "node still has unscheduled dependencies");
-      if (&*CurrentTop == MI)
-        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
-      else {
-        moveInstruction(MI, CurrentTop);
-        TopRPTracker.setPos(MI);
-      }
+}
 
-      // Update top scheduled pressure.
-      TopRPTracker.advance();
-      assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
-      updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+/// Move an instruction and update register pressure.
+void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+  // Move the instruction to its new location in the instruction stream.
+  MachineInstr *MI = SU->getInstr();
 
-      // Release dependent instructions for scheduling.
-      releaseSuccessors(SU);
+  if (IsTopNode) {
+    assert(SU->isTopReady() && "node still has unscheduled dependencies");
+    if (&*CurrentTop == MI)
+      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+    else {
+      moveInstruction(MI, CurrentTop);
+      TopRPTracker.setPos(MI);
     }
+
+    // Update top scheduled pressure.
+    TopRPTracker.advance();
+    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+  }
+  else {
+    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+    MachineBasicBlock::iterator priorII =
+      priorNonDebug(CurrentBottom, CurrentTop);
+    if (&*priorII == MI)
+      CurrentBottom = priorII;
     else {
-      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
-      MachineBasicBlock::iterator priorII =
-        priorNonDebug(CurrentBottom, CurrentTop);
-      if (&*priorII == MI)
-        CurrentBottom = priorII;
-      else {
-        if (&*CurrentTop == MI) {
-          CurrentTop = nextIfDebug(++CurrentTop, priorII);
-          TopRPTracker.setPos(CurrentTop);
-        }
-        moveInstruction(MI, CurrentBottom);
-        CurrentBottom = MI;
+      if (&*CurrentTop == MI) {
+        CurrentTop = nextIfDebug(++CurrentTop, priorII);
+        TopRPTracker.setPos(CurrentTop);
       }
-      // Update bottom scheduled pressure.
-      BotRPTracker.recede();
-      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
-      updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
-
-      // Release dependent instructions for scheduling.
-      releasePredecessors(SU);
+      moveInstruction(MI, CurrentBottom);
+      CurrentBottom = MI;
     }
-    SU->isScheduled = true;
-    SchedImpl->schedNode(SU, IsTopNode);
+    // Update bottom scheduled pressure.
+    BotRPTracker.recede();
+    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
   }
-  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
 
-  placeDebugValues();
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+  // Release dependent instructions for scheduling.
+  if (IsTopNode)
+    releaseSuccessors(SU);
+  else
+    releasePredecessors(SU);
+
+  SU->isScheduled = true;
+
+  // Notify the scheduling strategy after updating the DAG.
+  SchedImpl->schedNode(SU, IsTopNode);
 }
 
 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
@@ -716,91 +643,146 @@ void ScheduleDAGMI::placeDebugValues() {
   FirstDbgValue = NULL;
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+    if (SUnit *SU = getSUnit(&(*MI)))
+      SU->dump(this);
+    else
+      dbgs() << "Missing SUnit\n";
+  }
+}
+#endif
+
 //===----------------------------------------------------------------------===//
 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
-/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
-/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
-class ReadyQueue {
-  unsigned ID;
-  std::string Name;
-  std::vector<SUnit*> Queue;
-
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
 public:
-  ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
-
-  unsigned getID() const { return ID; }
-
-  StringRef getName() const { return Name; }
-
-  // SU is in this queue if it's NodeQueueID is a superset of this ID.
-  bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
-
-  bool empty() const { return Queue.empty(); }
-
-  unsigned size() const { return Queue.size(); }
-
-  typedef std::vector<SUnit*>::iterator iterator;
+  /// Represent the type of SchedCandidate found within a single queue.
+  /// pickNodeBidirectional depends on these listed by decreasing priority.
+  enum CandReason {
+    NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand,
+    BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce,
+    SingleMax, MultiPressure, NextDefUse, NodeOrder};
 
-  iterator begin() { return Queue.begin(); }
+#ifndef NDEBUG
+  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+#endif
 
-  iterator end() { return Queue.end(); }
+  /// Policy for scheduling the next instruction in the candidate's zone.
+  struct CandPolicy {
+    bool ReduceLatency;
+    unsigned ReduceResIdx;
+    unsigned DemandResIdx;
 
-  iterator find(SUnit *SU) {
-    return std::find(Queue.begin(), Queue.end(), SU);
-  }
+    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+  };
 
-  void push(SUnit *SU) {
-    Queue.push_back(SU);
-    SU->NodeQueueId |= ID;
-  }
+  /// Status of an instruction's critical resource consumption.
+  struct SchedResourceDelta {
+    // Count critical resources in the scheduled region required by SU.
+    unsigned CritResources;
 
-  void remove(iterator I) {
-    (*I)->NodeQueueId &= ~ID;
-    *I = Queue.back();
-    Queue.pop_back();
-  }
+    // Count critical resources from another region consumed by SU.
+    unsigned DemandedResources;
 
-  void dump() {
-    dbgs() << Name << ": ";
-    for (unsigned i = 0, e = Queue.size(); i < e; ++i)
-      dbgs() << Queue[i]->NodeNum << " ";
-    dbgs() << "\n";
-  }
-};
+    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
 
-/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class ConvergingScheduler : public MachineSchedStrategy {
+    bool operator==(const SchedResourceDelta &RHS) const {
+      return CritResources == RHS.CritResources
+        && DemandedResources == RHS.DemandedResources;
+    }
+    bool operator!=(const SchedResourceDelta &RHS) const {
+      return !operator==(RHS);
+    }
+  };
 
   /// Store the state used by ConvergingScheduler heuristics, required for the
   /// lifetime of one invocation of pickNode().
   struct SchedCandidate {
+    CandPolicy Policy;
+
     // The best SUnit candidate.
     SUnit *SU;
 
+    // The reason for this candidate.
+    CandReason Reason;
+
     // Register pressure values for the best candidate.
     RegPressureDelta RPDelta;
 
-    SchedCandidate(): SU(NULL) {}
+    // Critical resource consumption of the best candidate.
+    SchedResourceDelta ResDelta;
+
+    SchedCandidate(const CandPolicy &policy)
+    : Policy(policy), SU(NULL), Reason(NoCand) {}
+
+    bool isValid() const { return SU; }
+
+    // Copy the status of another candidate without changing policy.
+    void setBest(SchedCandidate &Best) {
+      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+      SU = Best.SU;
+      Reason = Best.Reason;
+      RPDelta = Best.RPDelta;
+      ResDelta = Best.ResDelta;
+    }
+
+    void initResourceDelta(const ScheduleDAGMI *DAG,
+                           const TargetSchedModel *SchedModel);
+  };
+
+  /// Summarize the unscheduled region.
+  struct SchedRemainder {
+    // Critical path through the DAG in expected latency.
+    unsigned CriticalPath;
+
+    // Unscheduled resources
+    SmallVector<unsigned, 16> RemainingCounts;
+    // Critical resource for the unscheduled zone.
+    unsigned CritResIdx;
+    // Number of micro-ops left to schedule.
+    unsigned RemainingMicroOps;
+    // Is the unscheduled zone resource limited.
+    bool IsResourceLimited;
+
+    unsigned MaxRemainingCount;
+
+    void reset() {
+      CriticalPath = 0;
+      RemainingCounts.clear();
+      CritResIdx = 0;
+      RemainingMicroOps = 0;
+      IsResourceLimited = false;
+      MaxRemainingCount = 0;
+    }
+
+    SchedRemainder() { reset(); }
+
+    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
   };
-  /// Represent the type of SchedCandidate found within a single queue.
-  enum CandResult {
-    NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };
 
   /// Each Scheduling boundary is associated with ready queues. It tracks the
-  /// current cycle in whichever direction at has moved, and maintains the state
+  /// current cycle in the direction of movement, and maintains the state
   /// of "hazards" and other interlocks at the current cycle.
   struct SchedBoundary {
     ScheduleDAGMI *DAG;
+    const TargetSchedModel *SchedModel;
+    SchedRemainder *Rem;
 
     ReadyQueue Available;
     ReadyQueue Pending;
     bool CheckPending;
 
+    // For heuristics, keep a list of the nodes that immediately depend on the
+    // most recently scheduled node.
+    SmallPtrSet<const SUnit*, 8> NextSUs;
+
     ScheduleHazardRecognizer *HazardRec;
 
     unsigned CurrCycle;
@@ -809,29 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy {
     /// MinReadyCycle - Cycle of the soonest available instruction.
     unsigned MinReadyCycle;
 
+    // The expected latency of the critical path in this scheduled zone.
+    unsigned ExpectedLatency;
+
+    // Resources used in the scheduled zone beyond this boundary.
+    SmallVector<unsigned, 16> ResourceCounts;
+
+    // Cache the critical resources ID in this scheduled zone.
+    unsigned CritResIdx;
+
+    // Is the scheduled region resource limited vs. latency limited.
+    bool IsResourceLimited;
+
+    unsigned ExpectedCount;
+
+    // Policy flag: attempt to find ILP until expected latency is covered.
+    bool ShouldIncreaseILP;
+
+#ifndef NDEBUG
     // Remember the greatest min operand latency.
     unsigned MaxMinLatency;
+#endif
+
+    void reset() {
+      Available.clear();
+      Pending.clear();
+      CheckPending = false;
+      NextSUs.clear();
+      HazardRec = 0;
+      CurrCycle = 0;
+      IssueCount = 0;
+      MinReadyCycle = UINT_MAX;
+      ExpectedLatency = 0;
+      ResourceCounts.resize(1);
+      assert(!ResourceCounts[0] && "nonzero count for bad resource");
+      CritResIdx = 0;
+      IsResourceLimited = false;
+      ExpectedCount = 0;
+      ShouldIncreaseILP = false;
+#ifndef NDEBUG
+      MaxMinLatency = 0;
+#endif
+      // Reserve a zero-count for invalid CritResIdx.
+      ResourceCounts.resize(1);
+    }
 
     /// Pending queues extend the ready queues with the same ID and the
     /// PendingFlag set.
     SchedBoundary(unsigned ID, const Twine &Name):
-      DAG(0), Available(ID, Name+".A"),
-      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
-      CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
-      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
+      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+      reset();
+    }
 
     ~SchedBoundary() { delete HazardRec; }
 
+    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
+              SchedRemainder *rem);
+
     bool isTop() const {
       return Available.getID() == ConvergingScheduler::TopQID;
     }
 
+    unsigned getUnscheduledLatency(SUnit *SU) const {
+      if (isTop())
+        return SU->getHeight();
+      return SU->getDepth();
+    }
+
+    unsigned getCriticalCount() const {
+      return ResourceCounts[CritResIdx];
+    }
+
     bool checkHazard(SUnit *SU);
 
+    void checkILPPolicy();
+
     void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
     void bumpCycle();
 
+    void countResource(unsigned PIdx, unsigned Cycles);
+
     void bumpNode(SUnit *SU);
 
     void releasePending();
@@ -841,10 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy {
     SUnit *pickOnlyChoice();
   };
 
+private:
   ScheduleDAGMI *DAG;
+  const TargetSchedModel *SchedModel;
   const TargetRegisterInfo *TRI;
 
   // State of the top and bottom scheduled instruction boundaries.
+  SchedRemainder Rem;
   SchedBoundary Top;
   SchedBoundary Bot;
 
@@ -857,7 +901,7 @@ public:
   };
 
   ConvergingScheduler():
-    DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
 
   virtual void initialize(ScheduleDAGMI *dag);
 
@@ -869,28 +913,80 @@ public:
 
   virtual void releaseBottomNode(SUnit *SU);
 
+  virtual void registerRoots();
+
 protected:
-  SUnit *pickNodeBidrectional(bool &IsTopNode);
+  void balanceZones(
+    ConvergingScheduler::SchedBoundary &CriticalZone,
+    ConvergingScheduler::SchedCandidate &CriticalCand,
+    ConvergingScheduler::SchedBoundary &OppositeZone,
+    ConvergingScheduler::SchedCandidate &OppositeCand);
+
+  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
+                           ConvergingScheduler::SchedCandidate &BotCand);
+
+  void tryCandidate(SchedCandidate &Cand,
+                    SchedCandidate &TryCand,
+                    SchedBoundary &Zone,
+                    const RegPressureTracker &RPTracker,
+                    RegPressureTracker &TempTracker);
+
+  SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+  void pickNodeFromQueue(SchedBoundary &Zone,
+                         const RegPressureTracker &RPTracker,
+                         SchedCandidate &Candidate);
 
-  CandResult pickNodeFromQueue(ReadyQueue &Q,
-                               const RegPressureTracker &RPTracker,
-                               SchedCandidate &Candidate);
 #ifndef NDEBUG
-  void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
-                      PressureElement P = PressureElement());
+  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
 #endif
 };
 } // namespace
 
+void ConvergingScheduler::SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+  reset();
+  if (!SchedModel->hasInstrSchedModel())
+    return;
+  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+  for (std::vector<SUnit>::iterator
+         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned PIdx = PI->ProcResourceIdx;
+      unsigned Factor = SchedModel->getResourceFactor(PIdx);
+      RemainingCounts[PIdx] += (Factor * PI->Cycles);
+    }
+  }
+}
+
+void ConvergingScheduler::SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+  reset();
+  DAG = dag;
+  SchedModel = smodel;
+  Rem = rem;
+  if (SchedModel->hasInstrSchedModel())
+    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+}
+
 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = dag;
+  SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
-  Top.DAG = dag;
-  Bot.DAG = dag;
+  Rem.init(DAG, SchedModel);
+  Top.init(DAG, SchedModel, &Rem);
+  Bot.init(DAG, SchedModel, &Rem);
+
+  // Initialize resource counts.
 
-  // Initialize the HazardRecognizers.
+  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+  // are disabled, then these HazardRecs will be disabled.
+  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
   const TargetMachine &TM = DAG->MF.getTarget();
-  const InstrItineraryData *Itin = TM.getInstrItineraryData();
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
@@ -905,13 +1001,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
-    unsigned Latency =
-      DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+    unsigned MinLatency = I->getMinLatency();
 #ifndef NDEBUG
-    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 #endif
-    if (SU->TopReadyCycle < PredReadyCycle + Latency)
-      SU->TopReadyCycle = PredReadyCycle + Latency;
+    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+      SU->TopReadyCycle = PredReadyCycle + MinLatency;
   }
   Top.releaseNode(SU, SU->TopReadyCycle);
 }
@@ -925,17 +1020,27 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
-    unsigned Latency =
-      DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+    unsigned MinLatency = I->getMinLatency();
 #ifndef NDEBUG
-    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 #endif
-    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
-      SU->BotReadyCycle = SuccReadyCycle + Latency;
+    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
   }
   Bot.releaseNode(SU, SU->BotReadyCycle);
 }
 
+void ConvergingScheduler::registerRoots() {
+  Rem.CriticalPath = DAG->ExitSU.getDepth();
+  // Some roots may not feed into ExitSU. Check all of them in case.
+  for (std::vector<SUnit*>::const_iterator
+         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+    if ((*I)->getDepth() > Rem.CriticalPath)
+      Rem.CriticalPath = (*I)->getDepth();
+  }
+  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
 /// Does this SU have a hazard within the current instruction group.
 ///
 /// The scheduler supports two modes of hazard recognition. The first is the
@@ -953,14 +1058,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   if (HazardRec->isEnabled())
     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 
-  if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
+          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
     return true;
-
+  }
   return false;
 }
 
+/// If expected latency is covered, disable ILP policy.
+void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
+  if (ShouldIncreaseILP
+      && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
+    ShouldIncreaseILP = false;
+    DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
+  }
+}
+
 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
                                                      unsigned ReadyCycle) {
+
   if (ReadyCycle < MinReadyCycle)
     MinReadyCycle = ReadyCycle;
 
@@ -970,15 +1088,31 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
     Pending.push(SU);
   else
     Available.push(SU);
+
+  // Record this node as an immediate dependent of the scheduled node.
+  NextSUs.insert(SU);
+
+  // If CriticalPath has been computed, then check if the unscheduled nodes
+  // exceed the ILP window. Before registerRoots, CriticalPath==0.
+  if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
+                            > Rem->CriticalPath + ILPWindow)) {
+    ShouldIncreaseILP = true;
+    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
+          << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
+  }
 }
 
 /// Move the boundary of scheduled code by one cycle.
 void ConvergingScheduler::SchedBoundary::bumpCycle() {
-  unsigned Width = DAG->getIssueWidth();
+  unsigned Width = SchedModel->getIssueWidth();
   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 
+  unsigned NextCycle = CurrCycle + 1;
   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
-  unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+  if (MinReadyCycle > NextCycle) {
+    IssueCount = 0;
+    NextCycle = MinReadyCycle;
+  }
 
   if (!HazardRec->isEnabled()) {
     // Bypass HazardRec virtual calls.
@@ -994,11 +1128,39 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
     }
   }
   CheckPending = true;
+  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
 
-  DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
         << CurrCycle << '\n');
 }
 
+/// Add the given processor resource to this scheduled zone.
+void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
+                                                       unsigned Cycles) {
+  unsigned Factor = SchedModel->getResourceFactor(PIdx);
+  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
+        << " +(" << Cycles << "x" << Factor
+        << ") / " << SchedModel->getLatencyFactor() << '\n');
+
+  unsigned Count = Factor * Cycles;
+  ResourceCounts[PIdx] += Count;
+  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+  Rem->RemainingCounts[PIdx] -= Count;
+
+  // Reset MaxRemainingCount for sanity.
+  Rem->MaxRemainingCount = 0;
+
+  // Check if this resource exceeds the current critical resource by a full
+  // cycle. If so, it becomes the critical resource.
+  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
+      >= (int)SchedModel->getLatencyFactor()) {
+    CritResIdx = PIdx;
+    DEBUG(dbgs() << "  *** Critical resource "
+          << SchedModel->getProcResource(PIdx)->Name << " x"
+          << ResourceCounts[PIdx] << '\n');
+  }
+}
+
 /// Move the boundary of scheduled code by one SUnit.
 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
   // Update the reservation table.
@@ -1010,11 +1172,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
     }
     HazardRec->EmitInstruction(SU);
   }
+  // Update resource counts and critical resource.
+  if (SchedModel->hasInstrSchedModel()) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      countResource(PI->ProcResourceIdx, PI->Cycles);
+    }
+  }
+  if (isTop()) {
+    if (SU->getDepth() > ExpectedLatency)
+      ExpectedLatency = SU->getDepth();
+  }
+  else {
+    if (SU->getHeight() > ExpectedLatency)
+      ExpectedLatency = SU->getHeight();
+  }
+
+  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
   // Check the instruction group dispatch limit.
   // TODO: Check if this SU must end a dispatch group.
-  IssueCount += DAG->getNumMicroOps(SU->getInstr());
-  if (IssueCount >= DAG->getIssueWidth()) {
-    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+
+  // checkHazard prevents scheduling multiple instructions per cycle that exceed
+  // issue width. However, we commonly reach the maximum. In this case
+  // opportunistically bump the cycle to avoid uselessly checking everything in
+  // the readyQ. Furthermore, a single instruction may produce more than one
+  // cycle's worth of micro-ops.
+  if (IssueCount >= SchedModel->getIssueWidth()) {
+    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
     bumpCycle();
   }
 }
@@ -1045,6 +1234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
     Pending.remove(Pending.begin()+i);
     --i; --e;
   }
+  DEBUG(if (!Pending.empty()) Pending.dump());
   CheckPending = false;
 }
 
@@ -1059,12 +1249,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
 }
 
 /// If this queue only has one ready candidate, return it. As a side effect,
-/// advance the cycle until at least one node is ready. If multiple instructions
-/// are ready, return NULL.
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   if (CheckPending)
     releasePending();
 
+  if (IssueCount > 0) {
+    // Defer any ready instrs that now have a hazard.
+    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+      if (checkHazard(*I)) {
+        Pending.push(*I);
+        I = Available.remove(I);
+        continue;
+      }
+      ++I;
+    }
+  }
   for (unsigned i = 0; Available.empty(); ++i) {
     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
            "permanent hazard"); (void)i;
@@ -1076,18 +1277,262 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   return NULL;
 }
 
-#ifndef NDEBUG
-void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
-                                         SUnit *SU, PressureElement P) {
-  dbgs() << Label << " " << Q.getName() << " ";
-  if (P.isValid())
-    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
-           << " ";
-  else
-    dbgs() << "     ";
-  SU->dump(DAG);
+/// Record the candidate policy for opposite zones with different critical
+/// resources.
+///
+/// If the CriticalZone is latency limited, don't force a policy for the
+/// candidates here. Instead, When releasing each candidate, releaseNode
+/// compares the region's critical path to the candidate's height or depth and
+/// the scheduled zone's expected latency then sets ShouldIncreaseILP.
+void ConvergingScheduler::balanceZones(
+  ConvergingScheduler::SchedBoundary &CriticalZone,
+  ConvergingScheduler::SchedCandidate &CriticalCand,
+  ConvergingScheduler::SchedBoundary &OppositeZone,
+  ConvergingScheduler::SchedCandidate &OppositeCand) {
+
+  if (!CriticalZone.IsResourceLimited)
+    return;
+
+  SchedRemainder *Rem = CriticalZone.Rem;
+
+  // If the critical zone is overconsuming a resource relative to the
+  // remainder, try to reduce it.
+  unsigned RemainingCritCount =
+    Rem->RemainingCounts[CriticalZone.CritResIdx];
+  if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
+      > (int)SchedModel->getLatencyFactor()) {
+    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
+    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
+          << '\n');
+  }
+  // If the other zone is underconsuming a resource relative to the full zone,
+  // try to increase it.
+  unsigned OppositeCount =
+    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
+  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
+      > (int)SchedModel->getLatencyFactor()) {
+    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
+    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
+          << '\n');
+  }
+}
+
+/// Determine if the scheduled zones exceed resource limits or critical path and
+/// set each candidate's ReduceHeight policy accordingly.
+void ConvergingScheduler::checkResourceLimits(
+  ConvergingScheduler::SchedCandidate &TopCand,
+  ConvergingScheduler::SchedCandidate &BotCand) {
+
+  Bot.checkILPPolicy();
+  Top.checkILPPolicy();
+  if (Bot.ShouldIncreaseILP)
+    BotCand.Policy.ReduceLatency = true;
+  if (Top.ShouldIncreaseILP)
+    TopCand.Policy.ReduceLatency = true;
+
+  // Handle resource-limited regions.
+  if (Top.IsResourceLimited && Bot.IsResourceLimited
+      && Top.CritResIdx == Bot.CritResIdx) {
+    // If the scheduled critical resource in both zones is no longer the
+    // critical remaining resource, attempt to reduce resource height both ways.
+    if (Top.CritResIdx != Rem.CritResIdx) {
+      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
+      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
+      DEBUG(dbgs() << "Reduce scheduled "
+            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
+    }
+    return;
+  }
+  // Handle latency-limited regions.
+  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
+    // If the total scheduled expected latency exceeds the region's critical
+    // path then reduce latency both ways.
+    //
+    // Just because a zone is not resource limited does not mean it is latency
+    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
+    // to exceed expected latency.
+    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
+        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
+      TopCand.Policy.ReduceLatency = true;
+      BotCand.Policy.ReduceLatency = true;
+      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+            << " + " << Bot.ExpectedLatency << '\n');
+    }
+    return;
+  }
+  // The critical resource is different in each zone, so request balancing.
+
+  // Compute the cost of each zone.
+  Rem.MaxRemainingCount = std::max(
+    Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
+    Rem.RemainingCounts[Rem.CritResIdx]);
+  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
+  Top.ExpectedCount = std::max(
+    Top.getCriticalCount(),
+    Top.ExpectedCount * SchedModel->getLatencyFactor());
+  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
+  Bot.ExpectedCount = std::max(
+    Bot.getCriticalCount(),
+    Bot.ExpectedCount * SchedModel->getLatencyFactor());
+
+  balanceZones(Top, TopCand, Bot, BotCand);
+  balanceZones(Bot, BotCand, Top, TopCand);
+}
+
+void ConvergingScheduler::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+                  const TargetSchedModel *SchedModel) {
+  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+    return;
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+      ResDelta.CritResources += PI->Cycles;
+    if (PI->ProcResourceIdx == Policy.DemandResIdx)
+      ResDelta.DemandedResources += PI->Cycles;
+  }
+}
+
+/// Return true if this heuristic determines order.
+static bool tryLess(unsigned TryVal, unsigned CandVal,
+                    ConvergingScheduler::SchedCandidate &TryCand,
+                    ConvergingScheduler::SchedCandidate &Cand,
+                    ConvergingScheduler::CandReason Reason) {
+  if (TryVal < CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal > CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  return false;
+}
+static bool tryGreater(unsigned TryVal, unsigned CandVal,
+                       ConvergingScheduler::SchedCandidate &TryCand,
+                       ConvergingScheduler::SchedCandidate &Cand,
+                       ConvergingScheduler::CandReason Reason) {
+  if (TryVal > CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal < CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  return false;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+                                       SchedCandidate &TryCand,
+                                       SchedBoundary &Zone,
+                                       const RegPressureTracker &RPTracker,
+                                       RegPressureTracker &TempTracker) {
+
+  // Always initialize TryCand's RPDelta.
+  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
+                                  DAG->getRegionCriticalPSets(),
+                                  DAG->getRegPressure().MaxSetPressure);
+
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+  // Avoid exceeding the target's limit.
+  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
+              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+    return;
+  if (Cand.Reason == SingleExcess)
+    Cand.Reason = MultiPressure;
+
+  // Avoid increasing the max critical pressure in the scheduled region.
+  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
+              Cand.RPDelta.CriticalMax.UnitIncrease,
+              TryCand, Cand, SingleCritical))
+    return;
+  if (Cand.Reason == SingleCritical)
+    Cand.Reason = MultiPressure;
+
+  // Avoid critical resource consumption and balance the schedule.
+  TryCand.initResourceDelta(DAG, SchedModel);
+  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+              TryCand, Cand, ResourceReduce))
+    return;
+  if (tryGreater(TryCand.ResDelta.DemandedResources,
+                 Cand.ResDelta.DemandedResources,
+                 TryCand, Cand, ResourceDemand))
+    return;
+
+  // Avoid serializing long latency dependence chains.
+  if (Cand.Policy.ReduceLatency) {
+    if (Zone.isTop()) {
+      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
+          > Zone.ExpectedCount) {
+        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                    TryCand, Cand, TopDepthReduce))
+          return;
+      }
+      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                     TryCand, Cand, TopPathReduce))
+        return;
+    }
+    else {
+      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
+          > Zone.ExpectedCount) {
+        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                    TryCand, Cand, BotHeightReduce))
+          return;
+      }
+      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                     TryCand, Cand, BotPathReduce))
+        return;
+    }
+  }
+
+  // Avoid increasing the max pressure of the entire region.
+  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
+              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
+    return;
+  if (Cand.Reason == SingleMax)
+    Cand.Reason = MultiPressure;
+
+  // Prefer immediate defs/users of the last scheduled instruction. This is a
+  // nice pressure avoidance strategy that also conserves the processor's
+  // register renaming resources and keeps the machine code readable.
+  if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) {
+    TryCand.Reason = NextDefUse;
+    return;
+  }
+  if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) {
+    if (Cand.Reason > NextDefUse)
+      Cand.Reason = NextDefUse;
+    return;
+  }
+  // Fall through to original instruction order.
+  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+    TryCand.Reason = NodeOrder;
+  }
 }
-#endif
 
 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
 /// more desirable than RHS from scheduling standpoint.
@@ -1098,109 +1543,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
   // have UnitIncrease==0, so are neutral.
 
   // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease)
+  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
+    DEBUG(dbgs() << "RP excess top - bot: "
+          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
-
+  }
   // Avoid increasing the max critical pressure in the scheduled region.
-  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease)
+  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
+    DEBUG(dbgs() << "RP critical top - bot: "
+          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
+          << '\n');
     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
-
+  }
   // Avoid increasing the max pressure of the entire region.
-  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease)
+  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
+    DEBUG(dbgs() << "RP current top - bot: "
+          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
+          << '\n');
     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
-
+  }
   return false;
 }
 
+#ifndef NDEBUG
+const char *ConvergingScheduler::getReasonStr(
+  ConvergingScheduler::CandReason Reason) {
+  switch (Reason) {
+  case NoCand:         return "NOCAND    ";
+  case SingleExcess:   return "REG-EXCESS";
+  case SingleCritical: return "REG-CRIT  ";
+  case SingleMax:      return "REG-MAX   ";
+  case MultiPressure:  return "REG-MULTI ";
+  case ResourceReduce: return "RES-REDUCE";
+  case ResourceDemand: return "RES-DEMAND";
+  case TopDepthReduce: return "TOP-DEPTH ";
+  case TopPathReduce:  return "TOP-PATH  ";
+  case BotHeightReduce:return "BOT-HEIGHT";
+  case BotPathReduce:  return "BOT-PATH  ";
+  case NextDefUse:     return "DEF-USE   ";
+  case NodeOrder:      return "ORDER     ";
+  };
+  llvm_unreachable("Unknown reason!");
+}
+
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
+                                         const SchedBoundary &Zone) {
+  const char *Label = getReasonStr(Cand.Reason);
+  PressureElement P;
+  unsigned ResIdx = 0;
+  unsigned Latency = 0;
+  switch (Cand.Reason) {
+  default:
+    break;
+  case SingleExcess:
+    P = Cand.RPDelta.Excess;
+    break;
+  case SingleCritical:
+    P = Cand.RPDelta.CriticalMax;
+    break;
+  case SingleMax:
+    P = Cand.RPDelta.CurrentMax;
+    break;
+  case ResourceReduce:
+    ResIdx = Cand.Policy.ReduceResIdx;
+    break;
+  case ResourceDemand:
+    ResIdx = Cand.Policy.DemandResIdx;
+    break;
+  case TopDepthReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  case TopPathReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotHeightReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotPathReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  }
+  dbgs() << Label << " " << Zone.Available.getName() << " ";
+  if (P.isValid())
+    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+           << " ";
+  else
+    dbgs() << "     ";
+  if (ResIdx)
+    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+  else
+    dbgs() << "        ";
+  if (Latency)
+    dbgs() << Latency << " cycles ";
+  else
+    dbgs() << "         ";
+  Cand.SU->dump(DAG);
+}
+#endif
+
 /// Pick the best candidate from the top queue.
 ///
 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 /// DAG building. To adjust for the current scheduling location we need to
 /// maintain the number of vreg uses remaining to be top-scheduled.
-ConvergingScheduler::CandResult ConvergingScheduler::
-pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
-                  SchedCandidate &Candidate) {
+void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+                                            const RegPressureTracker &RPTracker,
+                                            SchedCandidate &Cand) {
+  ReadyQueue &Q = Zone.Available;
+
   DEBUG(Q.dump());
 
   // getMaxPressureDelta temporarily modifies the tracker.
   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
 
-  // BestSU remains NULL if no top candidates beat the best existing candidate.
-  CandResult FoundCandidate = NoCand;
   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
-    RegPressureDelta RPDelta;
-    TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
-                                    DAG->getRegionCriticalPSets(),
-                                    DAG->getRegPressure().MaxSetPressure);
-
-    // Initialize the candidate if needed.
-    if (!Candidate.SU) {
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = NodeOrder;
-      continue;
-    }
-    // Avoid exceeding the target's limit.
-    if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) {
-      DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleExcess;
-      continue;
-    }
-    if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleExcess)
-      FoundCandidate = MultiPressure;
-
-    // Avoid increasing the max critical pressure in the scheduled region.
-    if (RPDelta.CriticalMax.UnitIncrease
-        < Candidate.RPDelta.CriticalMax.UnitIncrease) {
-      DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleCritical;
-      continue;
-    }
-    if (RPDelta.CriticalMax.UnitIncrease
-        > Candidate.RPDelta.CriticalMax.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleCritical)
-      FoundCandidate = MultiPressure;
-
-    // Avoid increasing the max pressure of the entire region.
-    if (RPDelta.CurrentMax.UnitIncrease
-        < Candidate.RPDelta.CurrentMax.UnitIncrease) {
-      DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = SingleMax;
-      continue;
-    }
-    if (RPDelta.CurrentMax.UnitIncrease
-        > Candidate.RPDelta.CurrentMax.UnitIncrease)
-      continue;
-    if (FoundCandidate == SingleMax)
-      FoundCandidate = MultiPressure;
-
-    // Fall through to original instruction order.
-    // Only consider node order if Candidate was chosen from this Q.
-    if (FoundCandidate == NoCand)
-      continue;
 
-    if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
-        || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
-      DEBUG(traceCandidate("NCAND", Q, *I));
-      Candidate.SU = *I;
-      Candidate.RPDelta = RPDelta;
-      FoundCandidate = NodeOrder;
+    SchedCandidate TryCand(Cand.Policy);
+    TryCand.SU = *I;
+    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+    if (TryCand.Reason != NoCand) {
+      // Initialize resource delta if needed in case future heuristics query it.
+      if (TryCand.ResDelta == SchedResourceDelta())
+        TryCand.initResourceDelta(DAG, SchedModel);
+      Cand.setBest(TryCand);
+      DEBUG(traceCandidate(Cand, Zone));
     }
+    TryCand.SU = *I;
   }
-  return FoundCandidate;
+}
+
+static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+                      bool IsTop) {
+  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
+        << " SU(" << Cand.SU->NodeNum << ") "
+        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
 
 /// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
+SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // Schedule as far as possible in the direction of no choice. This is most
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
@@ -1211,11 +1691,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
     IsTopNode = true;
     return SU;
   }
-  SchedCandidate BotCand;
+  CandPolicy NoPolicy;
+  SchedCandidate BotCand(NoPolicy);
+  SchedCandidate TopCand(NoPolicy);
+  checkResourceLimits(TopCand, BotCand);
+
   // Prefer bottom scheduling when heuristics are silent.
-  CandResult BotResult = pickNodeFromQueue(Bot.Available,
-                                           DAG->getBotRPTracker(), BotCand);
-  assert(BotResult != NoCand && "failed to find the first candidate");
+  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
 
   // If either Q has a single candidate that provides the least increase in
   // Excess pressure, we can immediately schedule from that Q.
@@ -1224,37 +1707,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
   // affects picking from either Q. If scheduling in one direction must
   // increase pressure for one of the excess PSets, then schedule in that
   // direction first to provide more freedom in the other direction.
-  if (BotResult == SingleExcess || BotResult == SingleCritical) {
+  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
     IsTopNode = false;
+    tracePick(BotCand, IsTopNode);
     return BotCand.SU;
   }
   // Check if the top Q has a better candidate.
-  SchedCandidate TopCand;
-  CandResult TopResult = pickNodeFromQueue(Top.Available,
-                                           DAG->getTopRPTracker(), TopCand);
-  assert(TopResult != NoCand && "failed to find the first candidate");
+  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
 
-  if (TopResult == SingleExcess || TopResult == SingleCritical) {
-    IsTopNode = true;
-    return TopCand.SU;
-  }
   // If either Q has a single candidate that minimizes pressure above the
   // original region's pressure pick it.
-  if (BotResult == SingleMax) {
+  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
+    if (TopCand.Reason < BotCand.Reason) {
+      IsTopNode = true;
+      tracePick(TopCand, IsTopNode);
+      return TopCand.SU;
+    }
     IsTopNode = false;
+    tracePick(BotCand, IsTopNode);
     return BotCand.SU;
   }
-  if (TopResult == SingleMax) {
+  // Check for a salient pressure difference and pick the best from either side.
+  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
     IsTopNode = true;
+    tracePick(TopCand, IsTopNode);
     return TopCand.SU;
   }
-  // Check for a salient pressure difference and pick the best from either side.
-  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+  // Otherwise prefer the bottom candidate, in node order if all else failed.
+  if (TopCand.Reason < BotCand.Reason) {
     IsTopNode = true;
+    tracePick(TopCand, IsTopNode);
     return TopCand.SU;
   }
-  // Otherwise prefer the bottom candidate in node order.
   IsTopNode = false;
+  tracePick(BotCand, IsTopNode);
   return BotCand.SU;
 }
 
@@ -1266,33 +1753,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
     return NULL;
   }
   SUnit *SU;
-  if (ForceTopDown) {
-    SU = Top.pickOnlyChoice();
-    if (!SU) {
-      SchedCandidate TopCand;
-      CandResult TopResult =
-        pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
-      assert(TopResult != NoCand && "failed to find the first candidate");
-      (void)TopResult;
-      SU = TopCand.SU;
+  do {
+    if (ForceTopDown) {
+      SU = Top.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        SchedCandidate TopCand(NoPolicy);
+        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+        SU = TopCand.SU;
+      }
+      IsTopNode = true;
     }
-    IsTopNode = true;
-  }
-  else if (ForceBottomUp) {
-    SU = Bot.pickOnlyChoice();
-    if (!SU) {
-      SchedCandidate BotCand;
-      CandResult BotResult =
-        pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
-      assert(BotResult != NoCand && "failed to find the first candidate");
-      (void)BotResult;
-      SU = BotCand.SU;
+    else if (ForceBottomUp) {
+      SU = Bot.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        SchedCandidate BotCand(NoPolicy);
+        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+        SU = BotCand.SU;
+      }
+      IsTopNode = false;
     }
-    IsTopNode = false;
-  }
-  else {
-    SU = pickNodeBidrectional(IsTopNode);
-  }
+    else {
+      SU = pickNodeBidirectional(IsTopNode);
+    }
+  } while (SU->isScheduled);
+
   if (SU->isTopReady())
     Top.removeReady(SU);
   if (SU->isBottomReady())
@@ -1331,6 +1819,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
                         createConvergingSched);
 
 //===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+  ScheduleDAGILP *ILP;
+  bool MaximizeILP;
+
+  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+
+  /// \brief Apply a less-than relation on node priority.
+  bool operator()(const SUnit *A, const SUnit *B) const {
+    // Return true if A comes after B in the Q.
+    if (MaximizeILP)
+      return ILP->getILP(A) < ILP->getILP(B);
+    else
+      return ILP->getILP(A) > ILP->getILP(B);
+  }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+  ScheduleDAGILP ILP;
+  ILPOrder Cmp;
+
+  std::vector<SUnit*> ReadyQ;
+public:
+  ILPScheduler(bool MaximizeILP)
+  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+
+  virtual void initialize(ScheduleDAGMI *DAG) {
+    ReadyQ.clear();
+    ILP.resize(DAG->SUnits.size());
+  }
+
+  virtual void registerRoots() {
+    for (std::vector<SUnit*>::const_iterator
+           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
+      ILP.computeILP(*I);
+    }
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (ReadyQ.empty()) return NULL;
+    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    SUnit *SU = ReadyQ.back();
+    ReadyQ.pop_back();
+    IsTopNode = false;
+    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
+          << " ILP: " << ILP.getILP(SU) << '\n');
+    return SU;
+  }
+
+  virtual void schedNode(SUnit *, bool) {}
+
+  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+  virtual void releaseBottomNode(SUnit *SU) {
+    ReadyQ.push_back(SU);
+    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
 // Machine Instruction Shuffler for Correctness Testing
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index bc383cb..b117f8c 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -49,7 +49,6 @@ namespace {
     MachineDominatorTree *DT;   // Machine dominator tree
     MachineLoopInfo *LI;
     AliasAnalysis *AA;
-    BitVector AllocatableSet;   // Which physregs are allocatable?
 
     // Remember which edges have been considered for breaking.
     SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
@@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   DT = &getAnalysis<MachineDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
-  AllocatableSet = TRI->getAllocatableSet(MF);
 
   bool EverMadeChange = false;
 
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 1a3aa60..9686b04 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -14,9 +14,10 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -50,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   TII = MF->getTarget().getInstrInfo();
   TRI = MF->getTarget().getRegisterInfo();
-  ItinData = MF->getTarget().getInstrItineraryData();
   MRI = &MF->getRegInfo();
   Loops = &getAnalysis<MachineLoopInfo>();
+  const TargetSubtargetInfo &ST =
+    MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
   return false;
 }
@@ -674,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
     // Ignore dependencies outside the current trace.
     const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
-    if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+    if (!DefTBI.isEarlierInSameTrace(TBI))
       continue;
     unsigned Len = LIR.Height + Cycles[DefMI].Depth;
     MaxLen = std::max(MaxLen, Len);
@@ -737,16 +740,15 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
         const TraceBlockInfo&DepTBI =
           BlockInfo[Dep.DefMI->getParent()->getNumber()];
         // Ignore dependencies from outside the current trace.
-        if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+        if (!DepTBI.isEarlierInSameTrace(TBI))
           continue;
         assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
         unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
         // Add latency if DefMI is a real instruction. Transients get latency 0.
         if (!Dep.DefMI->isTransient())
-          DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
-                                                     Dep.DefMI, Dep.DefOp,
-                                                     UseMI, Dep.UseOp,
-                                                     /* FindMin = */ false);
+          DepCycle += MTM.SchedModel
+            .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+                                   /* FindMin = */ false);
         Cycle = std::max(Cycle, DepCycle);
       }
       // Remember the instruction depth.
@@ -769,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
 // Height is the issue height computed from virtual register dependencies alone.
 static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
                                       SparseSet<LiveRegUnit> &RegUnits,
-                                      const InstrItineraryData *ItinData,
+                                      const TargetSchedModel &SchedModel,
                                       const TargetInstrInfo *TII,
                                       const TargetRegisterInfo *TRI) {
   SmallVector<unsigned, 8> ReadOps;
@@ -792,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
       unsigned DepHeight = I->Cycle;
       if (!MI->isTransient()) {
         // We may not know the UseMI of this dependency, if it came from the
-        // live-in list.
-        if (I->MI)
-          DepHeight += TII->computeOperandLatency(ItinData,
-                                                  MI, MO.getOperandNo(),
-                                                  I->MI, I->Op);
-        else
-          // No UseMI. Just use the MI latency instead.
-          DepHeight += TII->getInstrLatency(ItinData, MI);
+        // live-in list. SchedModel can handle a NULL UseMI.
+        DepHeight += SchedModel
+          .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+                                 /* FindMin = */ false);
       }
       Height = std::max(Height, DepHeight);
       // This regunit is dead above MI.
@@ -832,12 +830,12 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
 static bool pushDepHeight(const DataDep &Dep,
                           const MachineInstr *UseMI, unsigned UseHeight,
                           MIHeightMap &Heights,
-                          const InstrItineraryData *ItinData,
+                          const TargetSchedModel &SchedModel,
                           const TargetInstrInfo *TII) {
   // Adjust height by Dep.DefMI latency.
   if (!Dep.DefMI->isTransient())
-    UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
-                                            UseMI, Dep.UseOp);
+    UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+                                                  UseMI, Dep.UseOp, false);
 
   // Update Heights[DefMI] to be the maximum height seen.
   MIHeightMap::iterator I;
@@ -852,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep,
   return false;
 }
 
-/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
-/// of all the blocks in Trace. Stop when reaching the block that contains
-/// DefMI.
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
 void MachineTraceMetrics::Ensemble::
-addLiveIns(const MachineInstr *DefMI,
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
            ArrayRef<const MachineBasicBlock*> Trace) {
   assert(!Trace.empty() && "Trace should contain at least one block");
-  unsigned Reg = DefMI->getOperand(0).getReg();
+  unsigned Reg = DefMI->getOperand(DefOp).getReg();
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   const MachineBasicBlock *DefMBB = DefMI->getParent();
 
@@ -951,8 +949,8 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
           unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
           DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
           if (pushDepHeight(Deps.front(), PHI, Height,
-                            Heights, MTM.ItinData, MTM.TII))
-            addLiveIns(Deps.front().DefMI, Stack);
+                            Heights, MTM.SchedModel, MTM.TII))
+            addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
         }
       }
     }
@@ -980,12 +978,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
       // There may also be regunit dependencies to include in the height.
       if (HasPhysRegs)
         Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
-                                      MTM.ItinData, MTM.TII, MTM.TRI);
+                                      MTM.SchedModel, MTM.TII, MTM.TRI);
 
       // Update the required height of any virtual registers read by MI.
       for (unsigned i = 0, e = Deps.size(); i != e; ++i)
-        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
-          addLiveIns(Deps[i].DefMI, Stack);
+        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+          addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
 
       InstrCycles &MICycles = Cycles[MI];
       MICycles.Height = Cycle;
@@ -1054,10 +1052,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
   unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
   // Add latency if DefMI is a real instruction. Transients get latency 0.
   if (!Dep.DefMI->isTransient())
-    DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
-                                                  Dep.DefMI, Dep.DefOp,
-                                                  PHI, Dep.UseOp,
-                                                  /* FindMin = */ false);
+    DepCycle += TE.MTM.SchedModel
+      .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
   return DepCycle;
 }
 
@@ -1068,9 +1064,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
   unsigned Instrs = TBI.InstrDepth;
   if (Bottom)
     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
-  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
-    if (Model->IssueWidth != 0)
-      return Instrs / Model->IssueWidth;
+  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+    Instrs /= IW;
   // Assume issue width 1 without a schedule model.
   return Instrs;
 }
@@ -1080,9 +1075,8 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
-  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
-    if (Model->IssueWidth != 0)
-      return Instrs / Model->IssueWidth;
+  if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+    Instrs /= IW;
   // Assume issue width 1 without a schedule model.
   return Instrs;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
index c5b86f3..460730b 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
@@ -50,6 +50,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 
 namespace llvm {
 
@@ -67,9 +68,9 @@ class MachineTraceMetrics : public MachineFunctionPass {
   const MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
-  const InstrItineraryData *ItinData;
   const MachineRegisterInfo *MRI;
   const MachineLoopInfo *Loops;
+  TargetSchedModel SchedModel;
 
 public:
   class Ensemble;
@@ -164,6 +165,14 @@ public:
     /// Invalidate height resources when a block below this one has changed.
     void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
 
+    /// Determine if this block belongs to the same trace as TBI and comes
+    /// before it in the trace.
+    /// Also returns true when TBI == this.
+    bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const {
+      return hasValidDepth() && TBI.hasValidDepth() &&
+        Head == TBI.Head && InstrDepth <= TBI.InstrDepth;
+    }
+
     // Data-dependency-related information. Per-instruction depth and height
     // are computed from data dependencies in the current trace, using
     // itinerary data.
@@ -270,7 +279,7 @@ public:
     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
     void computeInstrDepths(const MachineBasicBlock*);
     void computeInstrHeights(const MachineBasicBlock*);
-    void addLiveIns(const MachineInstr *DefMI,
+    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
                     ArrayRef<const MachineBasicBlock*> Trace);
 
   protected:
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index f745b41..69a3ae8 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -23,8 +23,9 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/BasicBlock.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -73,11 +74,12 @@ namespace {
     typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+    typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
 
     const MachineInstr *FirstTerminator;
+    BlockSet FunctionBlocks;
 
     BitVector regsReserved;
-    BitVector regsAllocatable;
     RegSet regsLive;
     RegVector regsDefined, regsDead, regsKilled;
     RegMaskVector regMasks;
@@ -117,6 +119,9 @@ namespace {
       // block. This set is disjoint from regsLiveOut.
       RegSet vregsRequired;
 
+      // Set versions of block's predecessor and successor lists.
+      BlockSet Preds, Succs;
+
       BBInfo() : reachable(false) {}
 
       // Add register to vregsPassed if it belongs there. Return true if
@@ -180,7 +185,7 @@ namespace {
     }
 
     bool isAllocatable(unsigned Reg) {
-      return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+      return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
     }
 
     // Analysis information if available
@@ -208,6 +213,8 @@ namespace {
     void report(const char *msg, const MachineBasicBlock *MBB,
                 const LiveInterval &LI);
 
+    void verifyInlineAsm(const MachineInstr *MI);
+
     void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
@@ -352,7 +359,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
     MF->print(*OS, Indexes);
   }
   *OS << "*** Bad machine code: " << msg << " ***\n"
-      << "- function:    " << MF->getFunction()->getName() << "\n";
+      << "- function:    " << MF->getName() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -360,7 +367,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   report(msg, MBB->getParent());
   *OS << "- basic block: BB#" << MBB->getNumber()
       << ' ' << MBB->getName()
-      << " (" << (void*)MBB << ')';
+      << " (" << (const void*)MBB << ')';
   if (Indexes)
     *OS << " [" << Indexes->getMBBStartIdx(MBB)
         << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
@@ -419,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
 
 void MachineVerifier::visitMachineFunctionBefore() {
   lastIndex = SlotIndex();
-  regsReserved = TRI->getReservedRegs(*MF);
+  regsReserved = MRI->getReservedRegs();
 
   // A sub-register of a reserved register is also reserved
   for (int Reg = regsReserved.find_first(); Reg>=0;
@@ -431,9 +438,23 @@ void MachineVerifier::visitMachineFunctionBefore() {
     }
   }
 
-  regsAllocatable = TRI->getAllocatableSet(*MF);
-
   markReachable(&MF->front());
+
+  // Build a set of the basic blocks in the function.
+  FunctionBlocks.clear();
+  for (MachineFunction::const_iterator
+       I = MF->begin(), E = MF->end(); I != E; ++I) {
+    FunctionBlocks.insert(I);
+    BBInfo &MInfo = MBBInfoMap[I];
+
+    MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+    if (MInfo.Preds.size() != I->pred_size())
+      report("MBB has duplicate entries in its predecessor list.", I);
+
+    MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+    if (MInfo.Succs.size() != I->succ_size())
+      report("MBB has duplicate entries in its successor list.", I);
+  }
 }
 
 // Does iterator point to a and b as the first two elements?
@@ -470,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
        E = MBB->succ_end(); I != E; ++I) {
     if ((*I)->isLandingPad())
       LandingPadSuccs.insert(*I);
+    if (!FunctionBlocks.count(*I))
+      report("MBB has successor that isn't part of the function.", MBB);
+    if (!MBBInfoMap[*I].Preds.count(MBB)) {
+      report("Inconsistent CFG", MBB);
+      *OS << "MBB is not in the predecessor list of the successor BB#"
+          << (*I)->getNumber() << ".\n";
+    }
+  }
+
+  // Check the predecessor list.
+  for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+       E = MBB->pred_end(); I != E; ++I) {
+    if (!FunctionBlocks.count(*I))
+      report("MBB has predecessor that isn't part of the function.", MBB);
+    if (!MBBInfoMap[*I].Succs.count(MBB)) {
+      report("Inconsistent CFG", MBB);
+      *OS << "MBB is not in the successor list of the predecessor BB#"
+          << (*I)->getNumber() << ".\n";
+    }
   }
 
   const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -540,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       ++MBBI;
       if (MBBI == MF->end()) {
         report("MBB conditionally falls through out of function!", MBB);
-      } if (MBB->succ_size() != 2) {
+      } if (MBB->succ_size() == 1) {
+        // A conditional branch with only one successor is weird, but allowed.
+        if (&*MBBI != TBB)
+          report("MBB exits via conditional branch/fall-through but only has "
+                 "one CFG successor!", MBB);
+        else if (TBB != *MBB->succ_begin())
+          report("MBB exits via conditional branch/fall-through but the CFG "
+                 "successor don't match the actual successor!", MBB);
+      } else if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/fall-through but doesn't have "
                "exactly two CFG successors!", MBB);
       } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -560,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
     } else if (TBB && FBB) {
       // Block conditionally branches somewhere, otherwise branches
       // somewhere else.
-      if (MBB->succ_size() != 2) {
+      if (MBB->succ_size() == 1) {
+        // A conditional branch with only one successor is weird, but allowed.
+        if (FBB != TBB)
+          report("MBB exits via conditional branch/branch through but only has "
+                 "one CFG successor!", MBB);
+        else if (TBB != *MBB->succ_begin())
+          report("MBB exits via conditional branch/branch through but the CFG "
+                 "successor don't match the actual successor!", MBB);
+      } else if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/branch but doesn't have "
                "exactly two CFG successors!", MBB);
       } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -639,6 +695,50 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
   }
 }
 
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+  // The first two operands on INLINEASM are the asm string and global flags.
+  if (MI->getNumOperands() < 2) {
+    report("Too few operands on inline asm", MI);
+    return;
+  }
+  if (!MI->getOperand(0).isSymbol())
+    report("Asm string must be an external symbol", MI);
+  if (!MI->getOperand(1).isImm())
+    report("Asm flags must be an immediate", MI);
+  // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+  // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
+  if (!isUInt<5>(MI->getOperand(1).getImm()))
+    report("Unknown asm flags", &MI->getOperand(1), 1);
+
+  assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+
+  unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+  unsigned NumOps;
+  for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+    const MachineOperand &MO = MI->getOperand(OpNo);
+    // There may be implicit ops after the fixed operands.
+    if (!MO.isImm())
+      break;
+    NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+  }
+
+  if (OpNo > MI->getNumOperands())
+    report("Missing operands in last group", MI);
+
+  // An optional MDNode follows the groups.
+  if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+    ++OpNo;
+
+  // All trailing operands must be implicit registers.
+  for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+    const MachineOperand &MO = MI->getOperand(OpNo);
+    if (!MO.isReg() || !MO.isImplicit())
+      report("Expected implicit register after groups", &MO, OpNo);
+  }
+}
+
 void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   const MCInstrDesc &MCID = MI->getDesc();
   if (MI->getNumOperands() < MCID.getNumOperands()) {
@@ -647,6 +747,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
         << MI->getNumExplicitOperands() << " given.\n";
   }
 
+  // Check the tied operands.
+  if (MI->isInlineAsm())
+    verifyInlineAsm(MI);
+
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
@@ -702,6 +806,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       if (MO->isImplicit())
         report("Explicit operand marked as implicit", MO, MONum);
     }
+
+    int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+    if (TiedTo != -1) {
+      if (!MO->isReg())
+        report("Tied use must be a register", MO, MONum);
+      else if (!MO->isTied())
+        report("Operand should be tied", MO, MONum);
+      else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+        report("Tied def doesn't match MCInstrDesc", MO, MONum);
+    } else if (MO->isReg() && MO->isTied())
+      report("Explicit operand should not be tied", MO, MONum);
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
     if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
@@ -716,6 +831,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     if (MRI->tracksLiveness() && !MI->isDebugValue())
       checkLiveness(MO, MONum);
 
+    // Verify the consistency of tied operands.
+    if (MO->isTied()) {
+      unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+      const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+      if (!OtherMO.isReg())
+        report("Must be tied to a register", MO, MONum);
+      if (!OtherMO.isTied())
+        report("Missing tie flags on tied operand", MO, MONum);
+      if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+        report("Inconsistent tie links", MO, MONum);
+      if (MONum < MCID.getNumDefs()) {
+        if (OtherIdx < MCID.getNumOperands()) {
+          if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+            report("Explicit def tied to explicit use without tie constraint",
+                   MO, MONum);
+        } else {
+          if (!OtherMO.isImplicit())
+            report("Explicit def should be tied to implicit use", MO, MONum);
+        }
+      }
+    }
+
     // Verify two-address constraints after leaving SSA form.
     unsigned DefIdx;
     if (!MRI->isSSA() && MO->isUse() &&
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index cfa3eec..4ea21d4 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -49,8 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
     cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
-    cl::desc("Enable Early If-conversion"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+    cl::desc("Disable Early If-conversion"));
 static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -161,7 +161,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
     return applyDisable(TargetID, DisableMachineDCE);
 
   if (StandardID == &EarlyIfConverterID)
-    return applyDisable(TargetID, !EnableEarlyIfConversion);
+    return applyDisable(TargetID, DisableEarlyIfConversion);
 
   if (StandardID == &MachineLICMID)
     return applyDisable(TargetID, DisableMachineLICM);
@@ -447,8 +447,8 @@ void TargetPassConfig::addMachinePasses() {
     const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
     const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
     assert (TPI && IPI && "Pass ID not registered!");
-    const char *TID = (char *)(TPI->getTypeInfo());
-    const char *IID = (char *)(IPI->getTypeInfo());
+    const char *TID = (const char *)(TPI->getTypeInfo());
+    const char *IID = (const char *)(IPI->getTypeInfo());
     insertPass(TID, IID);
   }
 
@@ -456,7 +456,8 @@ void TargetPassConfig::addMachinePasses() {
   printAndVerify("After Instruction Selection");
 
   // Expand pseudo-instructions emitted by ISel.
-  addPass(&ExpandISelPseudosID);
+  if (addPass(&ExpandISelPseudosID))
+    printAndVerify("After ExpandISelPseudos");
 
   // Add passes that optimize machine instructions in SSA form.
   if (getOptLevel() != CodeGenOpt::None) {
@@ -528,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
   // instructions dead.
   addPass(&OptimizePHIsID);
 
+  // This pass merges large allocas. StackSlotColoring is a different pass
+  // which merges spill slots.
+  addPass(&StackColoringID);
+
   // If the target requests it, assign local variables to stack slots relative
   // to one another and simplify frame index references where possible.
   addPass(&LocalStackSlotAllocationID);
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862..a795ac8 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         SeenMoveImm = true;
       } else {
         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+        // optimizeExtInstr might have created new instructions after MI
+        // and before the already incremented MII. Adjust MII so that the
+        // next iteration sees the new instructions.
+        MII = MI;
+        ++MII;
         if (SeenMoveImm)
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 7449ff5..d57bc73 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() {
   ScheduleDAGInstrs::exitRegion();
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dumpSchedule - dump the scheduled Sequence.
 void SchedulePostRATDList::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const {
       dbgs() << "**** NOOP ****\n";
   }
 }
+#endif
 
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
@@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+      dbgs() << "*** DEBUG scheduling " << Fn.getName()
              << ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
@@ -488,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   BitVector killedRegs(TRI->getNumRegs());
-  BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
 
@@ -529,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       bool kill = false;
       if (!killedRegs.test(Reg)) {
@@ -564,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
       unsigned Reg = MO.getReg();
-      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
 
       LiveRegs.set(Reg);
 
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 34d075c..e4e18c3 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
 bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
-               << "********** Function: "
-               << ((Value*)MF.getFunction())->getName() << '\n');
+               << "********** Function: " << MF.getName() << '\n');
 
   bool Changed = false;
 
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index c791ffb..77554d6 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   placeCSRSpillsAndRestores(Fn);
 
   // Add the code to save and restore the callee saved registers
-  if (!F->hasFnAttr(Attribute::Naked))
+  if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
     insertCSRSpillsAndRestores(Fn);
 
   // Allow the target machine to make final modifications to the function
@@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters()
   // must be called before this function in order to set the AdjustsStack
   // and MaxCallFrameSize variables.
-  if (!F->hasFnAttr(Attribute::Naked))
+  if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
     insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
@@ -221,13 +221,13 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;
 
   // In Naked functions we aren't going to save any registers.
-  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+  if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
     return;
 
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
     }
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 3a03807..8a49609 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -20,7 +20,6 @@
 #include "VirtRegMap.h"
 #include "LiveRegMatrix.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
 bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
                << "********** Function: "
-               << ((Value*)mf.getFunction())->getName() << '\n');
+               << mf.getName() << '\n');
 
   MF = &mf;
   RegAllocBase::init(getAnalysis<VirtRegMap>(),
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 6b3a48e..8892216 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -113,9 +113,11 @@ namespace {
     // PhysRegState - One of the RegState enums, or a virtreg.
     std::vector<unsigned> PhysRegState;
 
-    // UsedInInstr - BitVector of physregs that are used in the current
-    // instruction, and so cannot be allocated.
-    BitVector UsedInInstr;
+    typedef SparseSet<unsigned> UsedInInstrSet;
+
+    // UsedInInstr - Set of physregs that are used in the current instruction,
+    // and so cannot be allocated.
+    UsedInInstrSet UsedInInstr;
 
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
@@ -173,7 +175,7 @@ namespace {
                                        unsigned VirtReg, unsigned Hint);
     LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
-    void spillAll(MachineInstr *MI);
+    void spillAll(MachineBasicBlock::iterator MI);
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
     void addRetOperands(MachineBasicBlock *MBB);
   };
@@ -312,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
 }
 
 /// spillAll - Spill all dirty virtregs without killing them.
-void RAFast::spillAll(MachineInstr *MI) {
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
   if (LiveVirtRegs.empty()) return;
   isBulkSpilling = true;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     PhysRegState[PhysReg] = regFree;
     // Fall through
   case regFree:
-    UsedInInstr.set(PhysReg);
+    UsedInInstr.insert(PhysReg);
     MO.setIsKill();
     return;
   default:
@@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) {
              "Instruction is not using a subregister of a reserved register");
       // Leave the superregister in the working set.
       PhysRegState[Alias] = regFree;
-      UsedInInstr.set(Alias);
+      UsedInInstr.insert(Alias);
       MO.getParent()->addRegisterKilled(Alias, TRI, true);
       return;
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
-        UsedInInstr.set(Alias);
+        UsedInInstr.insert(Alias);
         MO.getParent()->addRegisterKilled(Alias, TRI, true);
         return;
       }
@@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 
   // All aliases are disabled, bring register into working set.
   PhysRegState[PhysReg] = regFree;
-  UsedInInstr.set(PhysReg);
+  UsedInInstr.insert(PhysReg);
   MO.setIsKill();
 }
 
@@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 /// reserved instead of allocated.
 void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
                            RegState NewState) {
-  UsedInInstr.set(PhysReg);
+  UsedInInstr.insert(PhysReg);
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
@@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // can be allocated directly.
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
-  if (UsedInInstr.test(PhysReg)) {
+  if (UsedInInstr.count(PhysReg)) {
     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
@@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   unsigned Cost = 0;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     unsigned Alias = *AI;
-    if (UsedInInstr.test(Alias))
+    if (UsedInInstr.count(Alias))
       return spillImpossible;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
@@ -509,7 +511,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
 
   // Ignore invalid hints.
   if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+               !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
     Hint = 0;
 
   // Take hint when possible.
@@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
   // First try to find a completely free register.
   for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
       assignVirtToPhysReg(*LRI, PhysReg);
       return LRI;
     }
@@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
   LRI->Dirty = true;
-  UsedInInstr.set(LRI->PhysReg);
+  UsedInInstr.insert(LRI->PhysReg);
   return LRI;
 }
 
@@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
   assert(LRI->PhysReg && "Register not assigned");
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
-  UsedInInstr.set(LRI->PhysReg);
+  UsedInInstr.insert(LRI->PhysReg);
   return LRI;
 }
 
@@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-      UsedInInstr.set(*AI);
+      UsedInInstr.insert(*AI);
       if (ThroughRegs.count(PhysRegState[*AI]))
         definePhysReg(MI, *AI, regFree);
     }
@@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
   }
 
   // Restore UsedInInstr to a state usable for allocating normal virtual uses.
-  UsedInInstr.reset();
+  UsedInInstr.clear();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
@@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
                  << " as used in instr\n");
-    UsedInInstr.set(Reg);
+    UsedInInstr.insert(Reg);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
   for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
-    UsedInInstr.set(PartialDefs[i]);
+    UsedInInstr.insert(PartialDefs[i]);
 }
 
 /// addRetOperand - ensure that a return instruction has an operand for each
@@ -838,7 +840,7 @@ void RAFast::AllocateBasicBlock() {
   // Add live-in registers as live.
   for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
          E = MBB->livein_end(); I != E; ++I)
-    if (RegClassInfo.isAllocatable(*I))
+    if (MRI->isAllocatable(*I))
       definePhysReg(MII, *I, regReserved);
 
   SmallVector<unsigned, 8> VirtDead;
@@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() {
     }
 
     // Track registers used by instruction.
-    UsedInInstr.reset();
+    UsedInInstr.clear();
 
     // First scan.
     // Mark physreg uses and early clobbers as used.
@@ -954,6 +956,11 @@ void RAFast::AllocateBasicBlock() {
     bool hasPhysDefs = false;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+      // Make sure MRI knows about registers clobbered by regmasks.
+      if (MO.isRegMask()) {
+        MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+        continue;
+      }
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (!Reg) continue;
@@ -970,7 +977,7 @@ void RAFast::AllocateBasicBlock() {
         }
         continue;
       }
-      if (!RegClassInfo.isAllocatable(Reg)) continue;
+      if (!MRI->isAllocatable(Reg)) continue;
       if (MO.isUse()) {
         usePhysReg(MO);
       } else if (MO.isEarlyClobber()) {
@@ -1016,11 +1023,13 @@ void RAFast::AllocateBasicBlock() {
       }
     }
 
-    MRI->addPhysRegsUsed(UsedInInstr);
+    for (UsedInInstrSet::iterator
+         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+      MRI->setPhysRegUsed(*I);
 
     // Track registers defined by instruction - early clobbers and tied uses at
     // this point.
-    UsedInInstr.reset();
+    UsedInInstr.clear();
     if (hasEarlyClobbers) {
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
@@ -1030,7 +1039,7 @@ void RAFast::AllocateBasicBlock() {
         // Look for physreg defs and tied uses.
         if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
         for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-          UsedInInstr.set(*AI);
+          UsedInInstr.insert(*AI);
       }
     }
 
@@ -1058,7 +1067,7 @@ void RAFast::AllocateBasicBlock() {
       unsigned Reg = MO.getReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (!RegClassInfo.isAllocatable(Reg)) continue;
+        if (!MRI->isAllocatable(Reg)) continue;
         definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
                                regFree : regReserved);
         continue;
@@ -1080,7 +1089,9 @@ void RAFast::AllocateBasicBlock() {
       killVirtReg(VirtDead[i]);
     VirtDead.clear();
 
-    MRI->addPhysRegsUsed(UsedInInstr);
+    for (UsedInInstrSet::iterator
+         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+      MRI->setPhysRegUsed(*I);
 
     if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
       DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1110,8 +1121,7 @@ void RAFast::AllocateBasicBlock() {
 ///
 bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
-               << "********** Function: "
-               << ((Value*)Fn.getFunction())->getName() << '\n');
+               << "********** Function: " << Fn.getName() << '\n');
   MF = &Fn;
   MRI = &MF->getRegInfo();
   TM = &Fn.getTarget();
@@ -1119,7 +1129,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TII = TM->getInstrInfo();
   MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
-  UsedInInstr.resize(TRI->getNumRegs());
+  UsedInInstr.clear();
+  UsedInInstr.setUniverse(TRI->getNumRegs());
 
   assert(!MRI->isSSA() && "regalloc requires leaving SSA");
 
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428..06f69c1e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -24,7 +24,6 @@
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
@@ -331,9 +330,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
+  AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
@@ -509,7 +508,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
 ///
 /// @param VirtReg Live range that is about to be assigned.
 /// @param PhysReg Desired register for assignment.
-/// @prarm IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param IsHint  True when PhysReg is VirtReg's preferred register.
 /// @param MaxCost Only look for cheaper candidates and update with new cost
 ///                when returning true.
 /// @returns True when interference can be evicted cheaper than MaxCost.
@@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 
 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
-               << "********** Function: "
-               << ((Value*)mf.getFunction())->getName() << '\n');
+               << "********** Function: " << mf.getName() << '\n');
 
   MF = &mf;
   if (VerifyEnabled)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index d0db26b..02ebce7 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -118,7 +118,6 @@ private:
   typedef std::vector<AllowedSet> AllowedSetMap;
   typedef std::pair<unsigned, unsigned> RegPair;
   typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
   typedef std::set<unsigned> RegSet;
 
 
@@ -192,7 +191,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
                                                 const MachineLoopInfo *loopInfo,
                                                 const RegSet &vregs) {
 
-  typedef std::vector<const LiveInterval*> LIVector;
   LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
   MachineRegisterInfo *mri = &mf->getRegInfo();
   const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
@@ -209,8 +207,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     mri->setPhysRegUsed(Reg);
   }
 
-  BitVector reservedRegs = tri->getReservedRegs(*mf);
-
   // Iterate over vregs.
   for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
        vregItr != vregEnd; ++vregItr) {
@@ -219,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     LiveInterval *vregLI = &LIS->getInterval(vreg);
 
     // Record any overlaps with regmask operands.
-    BitVector regMaskOverlaps(tri->getNumRegs());
+    BitVector regMaskOverlaps;
     LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
 
     // Compute an initial allowed set for the current vreg.
@@ -228,7 +224,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
     for (unsigned i = 0; i != rawOrder.size(); ++i) {
       unsigned preg = rawOrder[i];
-      if (reservedRegs.test(preg))
+      if (mri->isReserved(preg))
         continue;
 
       // vregLI crosses a regmask operand that clobbers preg.
@@ -358,7 +354,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
                                                    loopInfo->getLoopDepth(mbb));
 
       if (cp.isPhys()) {
-        if (!lis->isAllocatable(dst)) {
+        if (!mf->getRegInfo().isAllocatable(dst)) {
           continue;
         }
 
@@ -433,6 +429,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
+  au.addPreserved<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
   if (customPassID)
     au.addRequiredID(*customPassID);
@@ -444,6 +441,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
   au.addRequired<VirtRegMap>();
+  au.addPreserved<VirtRegMap>();
   MachineFunctionPass::getAnalysisUsage(au);
 }
 
@@ -556,7 +554,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
   mri->freezeReservedRegs(MF);
 
-  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -570,11 +568,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   // Find the vreg intervals in need of allocation.
   findVRegIntervalsToAlloc();
 
+#ifndef NDEBUG
   const Function* func = mf->getFunction();
   std::string fqn =
     func->getParent()->getModuleIdentifier() + "." +
     func->getName().str();
-  (void)fqn;
+#endif
 
   // If there are non-empty intervals allocate them using pbqp.
   if (!vregsToAlloc.empty()) {
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 652bc30..805d235 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   CalleeSaved = CSR;
 
   // Different reserved registers?
-  BitVector RR = TRI->getReservedRegs(*MF);
-  if (RR != Reserved)
+  const BitVector &RR = MF->getRegInfo().getReservedRegs();
+  if (Reserved.size() != RR.size() || RR != Reserved) {
     Update = true;
-  Reserved = RR;
+    Reserved = RR;
+  }
 
   // Invalidate cached information from previous function.
   if (Update)
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 9906334..2538f10 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed");
 STATISTIC(numExtends  , "Number of copies extended");
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
 
 static cl::opt<bool>
 EnableJoining("join-liveintervals",
@@ -123,6 +125,9 @@ namespace {
     /// can use this information below to update aliases.
     bool joinIntervals(CoalescerPair &CP);
 
+    /// Attempt joining two virtual registers. Return true on success.
+    bool joinVirtRegs(CoalescerPair &CP);
+
     /// Attempt joining with a reserved physreg.
     bool joinReservedPhysReg(CoalescerPair &CP);
 
@@ -193,12 +198,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
 
 char RegisterCoalescer::ID = 0;
 
-static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
-  if (!a) return b;
-  if (!b) return a;
-  return tri.composeSubRegIndices(a, b);
-}
-
 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
                         unsigned &Src, unsigned &Dst,
                         unsigned &SrcSub, unsigned &DstSub) {
@@ -209,8 +208,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
     SrcSub = MI->getOperand(1).getSubReg();
   } else if (MI->isSubregToReg()) {
     Dst = MI->getOperand(0).getReg();
-    DstSub = compose(tri, MI->getOperand(0).getSubReg(),
-                     MI->getOperand(3).getImm());
+    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+                                      MI->getOperand(3).getImm());
     Src = MI->getOperand(2).getReg();
     SrcSub = MI->getOperand(2).getSubReg();
   } else
@@ -349,7 +348,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
     if (DstReg != Dst)
       return false;
     // Registers match, do the subregisters line up?
-    return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub);
+    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+           TRI.composeSubRegIndices(DstIdx, DstSub);
   }
 }
 
@@ -425,7 +425,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
-  if (!CP.isCoalescable(ACopyMI))
+  // Don't allow any partial copies, even if isCoalescable() allows them.
+  if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
     return false;
 
   // Get the LiveRange in IntB that this value number starts with.
@@ -583,7 +584,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
   unsigned NewReg = NewDstMO.getReg();
-  if (NewReg != IntB.reg || !NewDstMO.isKill())
+  if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
     return false;
 
   // Make sure there are no other definitions of IntB that would reach the
@@ -849,8 +850,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   // Update LiveDebugVariables.
   LDV->renameRegister(SrcReg, DstReg, SubIdx);
 
+  SmallPtrSet<MachineInstr*, 8> Visited;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
+    // Each instruction can only be rewritten once because sub-register
+    // composition is not always idempotent. When SrcReg != DstReg, rewriting
+    // the UseMI operands removes them from the SrcReg use-def chain, but when
+    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+    // operands mentioning the virtual register.
+    if (SrcReg == DstReg && !Visited.insert(UseMI))
+      continue;
+
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
     tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
@@ -890,7 +900,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
   /// Always join simple intervals that are defined by a single copy from a
   /// reserved register. This doesn't increase register pressure, so it is
   /// always beneficial.
-  if (!RegClassInfo.isReserved(CP.getDstReg())) {
+  if (!MRI->isReserved(CP.getDstReg())) {
     DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
     return false;
   }
@@ -1065,7 +1075,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 /// Attempt joining with a reserved physreg.
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   assert(CP.isPhys() && "Must be a physreg copy");
-  assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
                << '\n');
@@ -1102,347 +1112,797 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   return true;
 }
 
-/// ComputeUltimateVN - Assuming we are going to join two live intervals,
-/// compute what the resultant value numbers for each value in the input two
-/// ranges will be.  This is complicated by copies between the two which can
-/// and will commonly cause multiple value numbers to be merged into one.
-///
-/// VN is the value number that we're trying to resolve.  InstDefiningValue
-/// keeps track of the new InstDefiningValue assignment for the result
-/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
-/// whether a value in this or other is a copy from the opposite set.
-/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
-/// already been assigned.
-///
-/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
-/// contains the value number the copy is from.
-///
-static unsigned ComputeUltimateVN(VNInfo *VNI,
-                                  SmallVector<VNInfo*, 16> &NewVNInfo,
-                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
-                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
-                                  SmallVector<int, 16> &ThisValNoAssignments,
-                                  SmallVector<int, 16> &OtherValNoAssignments) {
-  unsigned VN = VNI->id;
-
-  // If the VN has already been computed, just return it.
-  if (ThisValNoAssignments[VN] >= 0)
-    return ThisValNoAssignments[VN];
-  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
-
-  // If this val is not a copy from the other val, then it must be a new value
-  // number in the destination.
-  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
-  if (I == ThisFromOther.end()) {
-    NewVNInfo.push_back(VNI);
-    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
-  }
-  VNInfo *OtherValNo = I->second;
-
-  // Otherwise, this *is* a copy from the RHS.  If the other side has already
-  // been computed, return it.
-  if (OtherValNoAssignments[OtherValNo->id] >= 0)
-    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-
-  // Mark this value number as currently being computed, then ask what the
-  // ultimate value # of the other value is.
-  ThisValNoAssignments[VN] = -2;
-  unsigned UltimateVN =
-    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
-                      OtherValNoAssignments, ThisValNoAssignments);
-  return ThisValNoAssignments[VN] = UltimateVN;
-}
+//===----------------------------------------------------------------------===//
+//                 Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+//    identity copy after joining SrcReg and DstReg. The copy instruction will
+//    be removed, and the value will be merged with the source value.
+//
+//    There can be several copies back and forth, causing many values to be
+//    merged into one. We compute a list of ultimate values in the joined live
+//    range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+//    predecessors have a live out value. It doesn't cause real interference,
+//    and can be merged into the value it overlaps. Like a coalescable copy, it
+//    can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+//    is already in the other register. This is like a coalescable copy, but
+//    the live range of the source register must be trimmed after erasing the
+//    copy instruction:
+//
+//      %src = COPY %ext
+//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+//    defining one lane at a time:
+//
+//      %dst:ssub0<def,read-undef> = FOO
+//      %src = BAR
+//      %dst:ssub1<def> = COPY %src
+//
+//    The live range of %src overlaps the %dst value defined by FOO, but
+//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+//    which was undef anyway.
+//
+//    The value mapping is more complicated in this case. The final live range
+//    will have different value numbers for both FOO and BAR, but there is no
+//    simple mapping from old to new values. It may even be necessary to add
+//    new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+//    is live, but never read. This can happen because we don't compute
+//    individual live ranges per lane.
+//
+//      %dst<def> = FOO
+//      %src = BAR
+//      %dst:ssub1<def> = COPY %src
+//
+//    This kind of interference is only resolved locally. If the clobbered
+//    lane value escapes the block, the join is aborted.
 
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair.
+class JoinVals {
+  LiveInterval &LI;
+
+  // Location of this register in the final joined register.
+  // Either CP.DstIdx or CP.SrcIdx.
+  unsigned SubIdx;
+
+  // Values that will be present in the final live range.
+  SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+  const CoalescerPair &CP;
+  LiveIntervals *LIS;
+  SlotIndexes *Indexes;
+  const TargetRegisterInfo *TRI;
+
+  // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
+  // This is suitable for passing to LiveInterval::join().
+  SmallVector<int, 8> Assignments;
+
+  // Conflict resolution for overlapping values.
+  enum ConflictResolution {
+    // No overlap, simply keep this value.
+    CR_Keep,
+
+    // Merge this value into OtherVNI and erase the defining instruction.
+    // Used for IMPLICIT_DEF, coalescable copies, and copies from external
+    // values.
+    CR_Erase,
+
+    // Merge this value into OtherVNI but keep the defining instruction.
+    // This is for the special case where OtherVNI is defined by the same
+    // instruction.
+    CR_Merge,
+
+    // Keep this value, and have it replace OtherVNI where possible. This
+    // complicates value mapping since OtherVNI maps to two different values
+    // before and after this def.
+    // Used when clobbering undefined or dead lanes.
+    CR_Replace,
+
+    // Unresolved conflict. Visit later when all values have been mapped.
+    CR_Unresolved,
+
+    // Unresolvable conflict. Abort the join.
+    CR_Impossible
+  };
 
-// Find out if we have something like
-// A = X
-// B = X
-// if so, we can pretend this is actually
-// A = X
-// B = A
-// which allows us to coalesce A and B.
-// VNI is the definition of B. LR is the life range of A that includes
-// the slot just before B. If we return true, we add "B = X" to DupCopies.
-// This implies that A dominates B.
-static bool RegistersDefinedFromSameValue(LiveIntervals &li,
-                                          const TargetRegisterInfo &tri,
-                                          CoalescerPair &CP,
-                                          VNInfo *VNI,
-                                          VNInfo *OtherVNI,
-                                     SmallVector<MachineInstr*, 8> &DupCopies) {
-  // FIXME: This is very conservative. For example, we don't handle
-  // physical registers.
-
-  MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
-
-  if (!MI || CP.isPartial() || CP.isPhys())
-    return false;
+  // Per-value info for LI. The lane bit masks are all relative to the final
+  // joined register, so they can be compared directly between SrcReg and
+  // DstReg.
+  struct Val {
+    ConflictResolution Resolution;
 
-  unsigned A = CP.getDstReg();
-  if (!TargetRegisterInfo::isVirtualRegister(A))
-    return false;
+    // Lanes written by this def, 0 for unanalyzed values.
+    unsigned WriteLanes;
 
-  unsigned B = CP.getSrcReg();
-  if (!TargetRegisterInfo::isVirtualRegister(B))
-    return false;
+    // Lanes with defined values in this register. Other lanes are undef and
+    // safe to clobber.
+    unsigned ValidLanes;
 
-  MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def);
-  if (!OtherMI)
-    return false;
+    // Value in LI being redefined by this def.
+    VNInfo *RedefVNI;
 
-  if (MI->isImplicitDef()) {
-    DupCopies.push_back(MI);
-    return true;
-  } else {
-    if (!MI->isFullCopy())
-      return false;
-    unsigned Src = MI->getOperand(1).getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Src))
-      return false;
-    if (!OtherMI->isFullCopy())
-      return false;
-    unsigned OtherSrc = OtherMI->getOperand(1).getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(OtherSrc))
-      return false;
+    // Value in the other live range that overlaps this def, if any.
+    VNInfo *OtherVNI;
 
-    if (Src != OtherSrc)
-      return false;
+    // Is this value an IMPLICIT_DEF?
+    bool IsImplicitDef;
 
-    // If the copies use two different value numbers of X, we cannot merge
-    // A and B.
-    LiveInterval &SrcInt = li.getInterval(Src);
-    // getVNInfoBefore returns NULL for undef copies. In this case, the
-    // optimization is still safe.
-    if (SrcInt.getVNInfoBefore(OtherVNI->def) !=
-        SrcInt.getVNInfoBefore(VNI->def))
-      return false;
+    // True when the live range of this value will be pruned because of an
+    // overlapping CR_Replace value in the other live range.
+    bool Pruned;
 
-    DupCopies.push_back(MI);
-    return true;
-  }
-}
+    // True once Pruned above has been computed.
+    bool PrunedComputed;
 
-/// joinIntervals - Attempt to join these two intervals.  On failure, this
-/// returns false.
-bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
-  // Handle physreg joins separately.
-  if (CP.isPhys())
-    return joinReservedPhysReg(CP);
+    Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+            RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
+            PrunedComputed(false) {}
 
-  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
-  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
-               << '\n');
+    bool isAnalyzed() const { return WriteLanes != 0; }
+  };
 
-  // Compute the final value assignment, assuming that the live ranges can be
-  // coalesced.
-  SmallVector<int, 16> LHSValNoAssignments;
-  SmallVector<int, 16> RHSValNoAssignments;
-  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
-  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
-  SmallVector<VNInfo*, 16> NewVNInfo;
+  // One entry per value number in LI.
+  SmallVector<Val, 8> Vals;
+
+  unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
+  VNInfo *stripCopies(VNInfo *VNI);
+  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+  void computeAssignment(unsigned ValNo, JoinVals &Other);
+  bool taintExtent(unsigned, unsigned, JoinVals&,
+                   SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+  bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+  JoinVals(LiveInterval &li, unsigned subIdx,
+           SmallVectorImpl<VNInfo*> &newVNInfo,
+           const CoalescerPair &cp,
+           LiveIntervals *lis,
+           const TargetRegisterInfo *tri)
+    : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
+      Indexes(LIS->getSlotIndexes()), TRI(tri),
+      Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+  {}
+
+  /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+  /// Returns false if any conflicts were impossible to resolve.
+  bool mapValues(JoinVals &Other);
+
+  /// Try to resolve conflicts that require all values to be mapped.
+  /// Returns false if any conflicts were impossible to resolve.
+  bool resolveConflicts(JoinVals &Other);
+
+  /// Prune the live range of values in Other.LI where they would conflict with
+  /// CR_Replace values in LI. Collect end points for restoring the live range
+  /// after joining.
+  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
+  /// Erase any machine instructions that have been coalesced away.
+  /// Add erased instructions to ErasedInstrs.
+  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+  /// the erased instrs.
+  void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+                   SmallVectorImpl<unsigned> &ShrinkRegs);
+
+  /// Get the value assignments suitable for passing to LiveInterval::join.
+  const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+/// Compute the bitmask of lanes actually written by DefMI.
+/// Set Redef if there are any partial register definitions that depend on the
+/// previous value of the register.
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+  unsigned L = 0;
+  for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+      continue;
+    L |= TRI->getSubRegIndexLaneMask(
+           TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
+    if (MO->readsReg())
+      Redef = true;
+  }
+  return L;
+}
 
-  SmallVector<MachineInstr*, 8> DupCopies;
-  SmallVector<MachineInstr*, 8> DeadCopies;
+/// Find the ultimate value that VNI was copied from.
+VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+  while (!VNI->isPHIDef()) {
+    MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+    assert(MI && "No defining instruction");
+    if (!MI->isFullCopy())
+      break;
+    unsigned Reg = MI->getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      break;
+    LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+    if (!LRQ.valueIn())
+      break;
+    VNI = LRQ.valueIn();
+  }
+  return VNI;
+}
 
-  LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
-  DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS
-               << '\n');
+/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+/// Return a conflict resolution when possible, but leave the hard cases as
+/// CR_Unresolved.
+/// Recursively calls computeAssignment() on this and Other, guaranteeing that
+/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+/// The recursion always goes upwards in the dominator tree, making loops
+/// impossible.
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  assert(!V.isAnalyzed() && "Value has already been analyzed!");
+  VNInfo *VNI = LI.getValNumInfo(ValNo);
+  if (VNI->isUnused()) {
+    V.WriteLanes = ~0u;
+    return CR_Keep;
+  }
 
-  // Loop over the value numbers of the LHS, seeing if any are defined from
-  // the RHS.
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->isPHIDef())
-      continue;
-    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
-    assert(MI && "Missing def");
-    if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
-      continue;
+  // Get the instruction defining this value, compute the lanes written.
+  const MachineInstr *DefMI = 0;
+  if (VNI->isPHIDef()) {
+    // Conservatively assume that all lanes in a PHI are valid.
+    V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+  } else {
+    DefMI = Indexes->getInstructionFromIndex(VNI->def);
+    bool Redef = false;
+    V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+    // If this is a read-modify-write instruction, there may be more valid
+    // lanes than the ones written by this instruction.
+    // This only covers partial redef operands. DefMI may have normal use
+    // operands reading the register. They don't contribute valid lanes.
+    //
+    // This adds ssub1 to the set of valid lanes in %src:
+    //
+    //   %src:ssub1<def> = FOO
+    //
+    // This leaves only ssub1 valid, making any other lanes undef:
+    //
+    //   %src:ssub1<def,read-undef> = FOO %src:ssub2
+    //
+    // The <read-undef> flag on the def operand means that old lane values are
+    // not important.
+    if (Redef) {
+      V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+      assert(V.RedefVNI && "Instruction is reading nonexistent value");
+      computeAssignment(V.RedefVNI->id, Other);
+      V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+    }
 
-    // Figure out the value # from the RHS.
-    VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def);
-    // The copy could be to an aliased physreg.
-    if (!OtherVNI)
-      continue;
+    // An IMPLICIT_DEF writes undef values.
+    if (DefMI->isImplicitDef()) {
+      V.IsImplicitDef = true;
+      V.ValidLanes &= ~V.WriteLanes;
+    }
+  }
 
-    // DstReg is known to be a register in the LHS interval.  If the src is
-    // from the RHS interval, we can use its value #.
-    if (CP.isCoalescable(MI))
-      DeadCopies.push_back(MI);
-    else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
-                                            DupCopies))
-      continue;
+  // Find the value in Other that overlaps VNI->def, if any.
+  LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+
+  // It is possible that both values are defined by the same instruction, or
+  // the values are PHIs defined in the same block. When that happens, the two
+  // values should be merged into one, but not into any preceding value.
+  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+    // One value stays, the other is merged. Keep the earlier one, or the first
+    // one we see.
+    if (OtherVNI->def < VNI->def)
+      Other.computeAssignment(OtherVNI->id, *this);
+    else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+      // This is an early-clobber def overlapping a live-in value in the other
+      // register. Not mergeable.
+      V.OtherVNI = OtherLRQ.valueIn();
+      return CR_Impossible;
+    }
+    V.OtherVNI = OtherVNI;
+    Val &OtherV = Other.Vals[OtherVNI->id];
+    // Keep this value, check for conflicts when analyzing OtherVNI.
+    if (!OtherV.isAnalyzed())
+      return CR_Keep;
+    // Both sides have been analyzed now.
+    // Allow overlapping PHI values. Any real interference would show up in a
+    // predecessor, the PHI itself can't introduce any conflicts.
+    if (VNI->isPHIDef())
+      return CR_Merge;
+    if (V.ValidLanes & OtherV.ValidLanes)
+      // Overlapping lanes can't be resolved.
+      return CR_Impossible;
+    else
+      return CR_Merge;
+  }
 
-    LHSValsDefinedFromRHS[VNI] = OtherVNI;
+  // No simultaneous def. Is Other live at the def?
+  V.OtherVNI = OtherLRQ.valueIn();
+  if (!V.OtherVNI)
+    // No overlap, no conflict.
+    return CR_Keep;
+
+  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+  // We have overlapping values, or possibly a kill of Other.
+  // Recursively compute assignments up the dominator tree.
+  Other.computeAssignment(V.OtherVNI->id, *this);
+  const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+  // Allow overlapping PHI values. Any real interference would show up in a
+  // predecessor, the PHI itself can't introduce any conflicts.
+  if (VNI->isPHIDef())
+    return CR_Replace;
+
+  // Check for simple erasable conflicts.
+  if (DefMI->isImplicitDef())
+    return CR_Erase;
+
+  // Include the non-conflict where DefMI is a coalescable copy that kills
+  // OtherVNI. We still want the copy erased and value numbers merged.
+  if (CP.isCoalescable(DefMI)) {
+    // Some of the lanes copied from OtherVNI may be undef, making them undef
+    // here too.
+    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+    return CR_Erase;
   }
 
-  // Loop over the value numbers of the RHS, seeing if any are defined from
-  // the LHS.
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->isPHIDef())
-      continue;
-    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
-    assert(MI && "Missing def");
-    if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
-      continue;
+  // This may not be a real conflict if DefMI simply kills Other and defines
+  // VNI.
+  if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+    return CR_Keep;
+
+  // Handle the case where VNI and OtherVNI can be proven to be identical:
+  //
+  //   %other = COPY %ext
+  //   %this  = COPY %ext <-- Erase this copy
+  //
+  if (DefMI->isFullCopy() && !CP.isPartial() &&
+      stripCopies(VNI) == stripCopies(V.OtherVNI))
+    return CR_Erase;
+
+  // If the lanes written by this instruction were all undef in OtherVNI, it is
+  // still safe to join the live ranges. This can't be done with a simple value
+  // mapping, though - OtherVNI will map to multiple values:
+  //
+  //   1 %dst:ssub0 = FOO                <-- OtherVNI
+  //   2 %src = BAR                      <-- VNI
+  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
+  //   4 BAZ %dst<kill>
+  //   5 QUUX %src<kill>
+  //
+  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+  // handles this complex value mapping.
+  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+    return CR_Replace;
+
+  // If the other live range is killed by DefMI and the live ranges are still
+  // overlapping, it must be because we're looking at an early clobber def:
+  //
+  //   %dst<def,early-clobber> = ASM %src<kill>
+  //
+  // In this case, it is illegal to merge the two live ranges since the early
+  // clobber def would clobber %src before it was read.
+  if (OtherLRQ.isKill()) {
+    // This case where the def doesn't overlap the kill is handled above.
+    assert(VNI->def.isEarlyClobber() &&
+           "Only early clobber defs can overlap a kill");
+    return CR_Impossible;
+  }
 
-    // Figure out the value # from the LHS.
-    VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def);
-    // The copy could be to an aliased physreg.
-    if (!OtherVNI)
-      continue;
+  // VNI is clobbering live lanes in OtherVNI, but there is still the
+  // possibility that no instructions actually read the clobbered lanes.
+  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+  // Otherwise Other.LI wouldn't be live here.
+  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+    return CR_Impossible;
+
+  // We need to verify that no instructions are reading the clobbered lanes. To
+  // save compile time, we'll only check that locally. Don't allow the tainted
+  // value to escape the basic block.
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+    return CR_Impossible;
+
+  // There are still some things that could go wrong besides clobbered lanes
+  // being read, for example OtherVNI may be only partially redefined in MBB,
+  // and some clobbered lanes could escape the block. Save this analysis for
+  // resolveConflicts() when all values have been mapped. We need to know
+  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+  // that now - the recursive analyzeValue() calls must go upwards in the
+  // dominator tree.
+  return CR_Unresolved;
+}
 
-    // DstReg is known to be a register in the RHS interval.  If the src is
-    // from the LHS interval, we can use its value #.
-    if (CP.isCoalescable(MI))
-      DeadCopies.push_back(MI);
-    else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
-                                            DupCopies))
-        continue;
+/// Compute the value assignment for ValNo in LI.
+/// This may be called recursively by analyzeValue(), but never for a ValNo on
+/// the stack.
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  if (V.isAnalyzed()) {
+    // Recursion should always move up the dominator tree, so ValNo is not
+    // supposed to reappear before it has been assigned.
+    assert(Assignments[ValNo] != -1 && "Bad recursion?");
+    return;
+  }
+  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+  case CR_Erase:
+  case CR_Merge:
+    // Merge this ValNo into OtherVNI.
+    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+    DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
+                 << LI.getValNumInfo(ValNo)->def << " into "
+                 << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+                 << V.OtherVNI->def << " --> @"
+                 << NewVNInfo[Assignments[ValNo]]->def << '\n');
+    break;
+  case CR_Replace:
+  case CR_Unresolved:
+    // The other value is going to be pruned if this join is successful.
+    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+    Other.Vals[V.OtherVNI->id].Pruned = true;
+    // Fall through.
+  default:
+    // This value number needs to go in the final joined live range.
+    Assignments[ValNo] = NewVNInfo.size();
+    NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+    break;
+  }
+}
 
-    RHSValsDefinedFromLHS[VNI] = OtherVNI;
+bool JoinVals::mapValues(JoinVals &Other) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    computeAssignment(i, Other);
+    if (Vals[i].Resolution == CR_Impossible) {
+      DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
+                   << '@' << LI.getValNumInfo(i)->def << '\n');
+      return false;
+    }
   }
+  return true;
+}
 
-  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
-  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
-  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+///   1 %dst = VLOAD           <-- Define all lanes in %dst
+///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
+///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
+///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+            SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+  VNInfo *VNI = LI.getValNumInfo(ValNo);
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+  // Scan Other.LI from VNI.def to MBBEnd.
+  LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+  assert(OtherI != Other.LI.end() && "No conflict?");
+  do {
+    // OtherI is pointing to a tainted value. Abort the join if the tainted
+    // lanes escape the block.
+    SlotIndex End = OtherI->end;
+    if (End >= MBBEnd) {
+      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+                   << OtherI->valno->id << '@' << OtherI->start << '\n');
+      return false;
+    }
+    DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+                 << OtherI->valno->id << '@' << OtherI->start
+                 << " to " << End << '\n');
+    // A dead def is not a problem.
+    if (End.isDead())
+      break;
+    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+    // Check for another def in the MBB.
+    if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+      break;
+
+    // Lanes written by the new def are no longer tainted.
+    const Val &OV = Other.Vals[OtherI->valno->id];
+    TaintedLanes &= ~OV.WriteLanes;
+    if (!OV.RedefVNI)
+      break;
+  } while (TaintedLanes);
+  return true;
+}
 
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+                         unsigned Lanes) {
+  if (MI->isDebugValue())
+    return false;
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
       continue;
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
-                      LHSValNoAssignments, RHSValNoAssignments);
-  }
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+    if (!MO->readsReg())
       continue;
-    // If this value number isn't a copy from the LHS, it's a new number.
-    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
-      NewVNInfo.push_back(VNI);
-      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+    if (Lanes & TRI->getSubRegIndexLaneMask(
+                  TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+      return true;
+  }
+  return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    Val &V = Vals[i];
+    assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+    if (V.Resolution != CR_Unresolved)
       continue;
-    }
+    DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
+                 << '@' << LI.getValNumInfo(i)->def << '\n');
+    ++NumLaneConflicts;
+    assert(V.OtherVNI && "Inconsistent conflict resolution.");
+    VNInfo *VNI = LI.getValNumInfo(i);
+    const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+    // join, those lanes will be tainted with a wrong value. Get the extent of
+    // the tainted lanes.
+    unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+    SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+      // Tainted lanes would extend beyond the basic block.
+      return false;
 
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
-                      RHSValNoAssignments, LHSValNoAssignments);
-  }
+    assert(!TaintExtent.empty() && "There should be at least one conflict.");
 
-  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
-  // interval lists to see if these intervals are coalescable.
-  LiveInterval::const_iterator I = LHS.begin();
-  LiveInterval::const_iterator IE = LHS.end();
-  LiveInterval::const_iterator J = RHS.begin();
-  LiveInterval::const_iterator JE = RHS.end();
-
-  // Collect interval end points that will no longer be kills.
-  SmallVector<MachineInstr*, 8> LHSOldKills;
-  SmallVector<MachineInstr*, 8> RHSOldKills;
-
-  // Skip ahead until the first place of potential sharing.
-  if (I != IE && J != JE) {
-    if (I->start < J->start) {
-      I = std::upper_bound(I, IE, J->start);
-      if (I != LHS.begin()) --I;
-    } else if (J->start < I->start) {
-      J = std::upper_bound(J, JE, I->start);
-      if (J != RHS.begin()) --J;
+    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+    MachineBasicBlock::iterator MI = MBB->begin();
+    if (!VNI->isPHIDef()) {
+      MI = Indexes->getInstructionFromIndex(VNI->def);
+      // No need to check the instruction defining VNI for reads.
+      ++MI;
     }
-  }
-
-  while (I != IE && J != JE) {
-    // Determine if these two live ranges overlap.
-    // If so, check value # info to determine if they are really different.
-    if (I->end > J->start && J->end > I->start) {
-      // If the live range overlap will map to the same value number in the
-      // result liverange, we can still coalesce them.  If not, we can't.
-      if (LHSValNoAssignments[I->valno->id] !=
-          RHSValNoAssignments[J->valno->id])
+    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+           "Interference ends on VNI->def. Should have been handled earlier");
+    MachineInstr *LastMI =
+      Indexes->getInstructionFromIndex(TaintExtent.front().first);
+    assert(LastMI && "Range must end at a proper instruction");
+    unsigned TaintNum = 0;
+    for(;;) {
+      assert(MI != MBB->end() && "Bad LastMI");
+      if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
         return false;
-
-      // Extended live ranges should no longer be killed.
-      if (!I->end.isBlock() && I->end < J->end)
-        if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end))
-          LHSOldKills.push_back(MI);
-      if (!J->end.isBlock() && J->end < I->end)
-        if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end))
-          RHSOldKills.push_back(MI);
+      }
+      // LastMI is the last instruction to use the current value.
+      if (&*MI == LastMI) {
+        if (++TaintNum == TaintExtent.size())
+          break;
+        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+        assert(LastMI && "Range must end at a proper instruction");
+        TaintedLanes = TaintExtent[TaintNum].second;
+      }
+      ++MI;
     }
 
-    if (I->end < J->end)
-      ++I;
-    else
-      ++J;
-  }
-
-  // Clear kill flags where live ranges are extended.
-  while (!LHSOldKills.empty())
-    LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
-  while (!RHSOldKills.empty())
-    RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI);
-
-  if (LHSValNoAssignments.empty())
-    LHSValNoAssignments.push_back(-1);
-  if (RHSValNoAssignments.empty())
-    RHSValNoAssignments.push_back(-1);
-
-  // Now erase all the redundant copies.
-  for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) {
-    MachineInstr *MI = DeadCopies[i];
-    if (!ErasedInstrs.insert(MI))
-      continue;
-    DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI)
-                 << '\t' << *MI);
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
+    // The tainted lanes are unused.
+    V.Resolution = CR_Replace;
+    ++NumLaneResolves;
   }
+  return true;
+}
 
-  SmallVector<unsigned, 8> SourceRegisters;
-  for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
-         E = DupCopies.end(); I != E; ++I) {
-    MachineInstr *MI = *I;
-    if (!ErasedInstrs.insert(MI))
-      continue;
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+//   %dst = COPY %src
+//   %src = COPY %dst  <-- This value to be pruned.
+//   %dst = COPY %src  <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  if (V.Pruned || V.PrunedComputed)
+    return V.Pruned;
+
+  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+    return V.Pruned;
+
+  // Follow copies up the dominator tree and check if any intermediate value
+  // has been pruned.
+  V.PrunedComputed = true;
+  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+  return V.Pruned;
+}
 
-    // If MI is a copy, then we have pretended that the assignment to B in
-    // A = X
-    // B = X
-    // was actually a copy from A. Now that we decided to coalesce A and B,
-    // transform the code into
-    // A = X
-    // In the case of the implicit_def, we just have to remove it.
-    if (!MI->isImplicitDef()) {
-      unsigned Src = MI->getOperand(1).getReg();
-      SourceRegisters.push_back(Src);
+void JoinVals::pruneValues(JoinVals &Other,
+                           SmallVectorImpl<SlotIndex> &EndPoints) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    SlotIndex Def = LI.getValNumInfo(i)->def;
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      break;
+    case CR_Replace: {
+      // This value takes precedence over the value in Other.LI.
+      LIS->pruneValue(&Other.LI, Def, &EndPoints);
+      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+      // instructions are only inserted to provide a live-out value for PHI
+      // predecessors, so the instruction should simply go away once its value
+      // has been replaced.
+      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+      bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
+      if (!Def.isBlock()) {
+        // Remove <def,read-undef> flags. This def is now a partial redef.
+        // Also remove <def,dead> flags since the joined live range will
+        // continue past this instruction.
+        for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+             MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+            MO->setIsUndef(EraseImpDef);
+            MO->setIsDead(false);
+          }
+        // This value will reach instructions below, but we need to make sure
+        // the live range also reaches the instruction at Def.
+        if (!EraseImpDef)
+          EndPoints.push_back(Def);
+      }
+      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+                   << ": " << Other.LI << '\n');
+      break;
+    }
+    case CR_Erase:
+    case CR_Merge:
+      if (isPrunedValue(i, Other)) {
+        // This value is ultimately a copy of a pruned value in LI or Other.LI.
+        // We can no longer trust the value mapping computed by
+        // computeAssignment(), the value that was originally copied could have
+        // been replaced.
+        LIS->pruneValue(&LI, Def, &EndPoints);
+        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+                     << Def << ": " << LI << '\n');
+      }
+      break;
+    case CR_Unresolved:
+    case CR_Impossible:
+      llvm_unreachable("Unresolved conflicts");
     }
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
   }
+}
 
-  // If B = X was the last use of X in a liverange, we have to shrink it now
-  // that B = X is gone.
-  for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
-         E = SourceRegisters.end(); I != E; ++I) {
-    LIS->shrinkToUses(&LIS->getInterval(*I));
+void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+                           SmallVectorImpl<unsigned> &ShrinkRegs) {
+  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+    // Get the def location before markUnused() below invalidates it.
+    SlotIndex Def = LI.getValNumInfo(i)->def;
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+      // longer. The IMPLICIT_DEF instructions are only inserted by
+      // PHIElimination to guarantee that all PHI predecessors have a value.
+      if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+        break;
+      // Remove value number i from LI. Note that this VNInfo is still present
+      // in NewVNInfo, so it will appear as an unused value number in the final
+      // joined interval.
+      LI.getValNumInfo(i)->markUnused();
+      LI.removeValNo(LI.getValNumInfo(i));
+      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+      // FALL THROUGH.
+
+    case CR_Erase: {
+      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+      assert(MI && "No instruction to erase");
+      if (MI->isCopy()) {
+        unsigned Reg = MI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+          ShrinkRegs.push_back(Reg);
+      }
+      ErasedInstrs.insert(MI);
+      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+      LIS->RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      break;
+    }
+    default:
+      break;
+    }
   }
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+  SmallVector<VNInfo*, 16> NewVNInfo;
+  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+  JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
+  JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
+
+  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+               << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+               << '\n');
+
+  // First compute NewVNInfo and the simple value mappings.
+  // Detect impossible conflicts early.
+  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+    return false;
+
+  // Some conflicts can only be resolved after all values have been mapped.
+  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+    return false;
 
-  // If we get here, we know that we can coalesce the live ranges.  Ask the
-  // intervals to coalesce themselves now.
-  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+  // All clear, the live ranges can be merged.
+
+  // The merging algorithm in LiveInterval::join() can't handle conflicting
+  // value mappings, so we need to remove any live ranges that overlap a
+  // CR_Replace resolution. Collect a set of end points that can be used to
+  // restore the live range after joining.
+  SmallVector<SlotIndex, 8> EndPoints;
+  LHSVals.pruneValues(RHSVals, EndPoints);
+  RHSVals.pruneValues(LHSVals, EndPoints);
+
+  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+  // registers to require trimming.
+  SmallVector<unsigned, 8> ShrinkRegs;
+  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  while (!ShrinkRegs.empty())
+    LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+  // Join RHS into LHS.
+  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
            MRI);
+
+  // Kill flags are going to be wrong if the live ranges were overlapping.
+  // Eventually, we should simply clear all kill flags when computing live
+  // ranges. They are reinserted after register allocation.
+  MRI->clearKillFlags(LHS.reg);
+  MRI->clearKillFlags(RHS.reg);
+
+  if (EndPoints.empty())
+    return true;
+
+  // Recompute the parts of the live range we had to remove because of
+  // CR_Replace conflicts.
+  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+               << " points: " << LHS << '\n');
+  LIS->extendToIndices(&LHS, EndPoints);
   return true;
 }
 
+/// joinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+  return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
 namespace {
   // DepthMBBCompare - Comparison predicate that sort first based on the loop
   // depth of the basic block (the unsigned), and then on the MBB number.
@@ -1564,8 +2024,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   Loops = &getAnalysis<MachineLoopInfo>();
 
   DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
-               << "********** Function: "
-               << ((Value*)MF->getFunction())->getName() << '\n');
+               << "********** Function: " << MF->getName() << '\n');
 
   if (VerifyCoalescing)
     MF->verify(this, "Before register coalescing");
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
index 8a6df98..47c3df1 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -63,6 +63,13 @@ namespace llvm {
       : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
         Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
 
+    /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+    /// No need to call setRegisters().
+    CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+                  const TargetRegisterInfo &tri)
+      : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
+        Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
     /// setRegisters - set registers to match the copy instruction MI. Return
     /// false if MI is not a coalescable copy instruction.
     bool setRegisters(const MachineInstr*);
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 43448c8..543c426 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -63,7 +63,8 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC,
   decreaseSetPressure(MaxSetPressure, RC, TRI);
 }
 
-void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Live In: ";
   for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
              << '\n';
   }
 }
+#endif
 
 /// Increase the current pressure as impacted by these physical registers and
 /// bump the high water mark if needed.
@@ -320,10 +322,8 @@ struct RegisterOperands {
         if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
           DeadDefs.push_back(MO.getReg());
       }
-      else {
-        if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
-          Defs.push_back(MO.getReg());
-      }
+      else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
+        Defs.push_back(MO.getReg());
     }
   }
 };
@@ -335,7 +335,7 @@ static void collectOperands(const MachineInstr *MI,
                             PhysRegOperands &PhysRegOpers,
                             VirtRegOperands &VirtRegOpers,
                             const TargetRegisterInfo *TRI,
-                            const RegisterClassInfo *RCI) {
+                            const MachineRegisterInfo *MRI) {
   for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
     const MachineOperand &MO = *OperI;
     if (!MO.isReg() || !MO.getReg())
@@ -343,7 +343,7 @@ static void collectOperands(const MachineInstr *MI,
 
     if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       VirtRegOpers.collect(MO, TRI);
-    else if (RCI->isAllocatable(MO.getReg()))
+    else if (MRI->isAllocatable(MO.getReg()))
       PhysRegOpers.collect(MO, TRI);
   }
   // Remove redundant physreg dead defs.
@@ -449,7 +449,7 @@ bool RegPressureTracker::recede() {
 
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Boost pressure for all dead defs together.
   increasePhysRegPressure(PhysRegOpers.DeadDefs);
@@ -522,7 +522,7 @@ bool RegPressureTracker::advance() {
 
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Kill liveness at last uses.
   for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
@@ -664,7 +664,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
   // Account for register pressure similar to RegPressureTracker::recede().
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Boost max pressure for all dead defs together.
   // Since CurrSetPressure and MaxSetPressure
@@ -674,9 +674,16 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
   decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
 
   // Kill liveness at live defs.
-  decreasePhysRegPressure(PhysRegOpers.Defs);
-  decreaseVirtRegPressure(VirtRegOpers.Defs);
-
+  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = PhysRegOpers.Defs[i];
+    if (!findReg(Reg, false, PhysRegOpers.Uses, TRI))
+      decreasePhysRegPressure(PhysRegOpers.Defs);
+  }
+  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = VirtRegOpers.Defs[i];
+    if (!findReg(Reg, true, VirtRegOpers.Uses, TRI))
+      decreaseVirtRegPressure(VirtRegOpers.Defs);
+  }
   // Generate liveness for uses.
   for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
     unsigned Reg = PhysRegOpers.Uses[i];
@@ -750,7 +757,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
   // Account for register pressure similar to RegPressureTracker::recede().
   PhysRegOperands PhysRegOpers;
   VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index d673794..5ec6564 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
     KillRegs.resize(NumPhysRegs);
     DefRegs.resize(NumPhysRegs);
 
-    // Create reserved registers bitvector.
-    ReservedRegs = TRI->getReservedRegs(MF);
-
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
     const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
@@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
   used = RegsAvailable;
   used.flip();
   if (includeReserved)
-    used |= ReservedRegs;
+    used |= MRI->getReservedRegs();
   else
-    used.reset(ReservedRegs);
+    used.reset(MRI->getReservedRegs());
 }
 
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 752f8e4..9a65071 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -279,6 +279,7 @@ void SUnit::ComputeHeight() {
   } while (!WorkList.empty());
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
@@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
   }
   dbgs() << "\n";
 }
+#endif
 
 #ifndef NDEBUG
 /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 9c1dba3..a4d4a93 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
@@ -30,6 +31,7 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -44,14 +46,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineDominatorTree &mdt,
                                      bool IsPostRAFlag,
                                      LiveIntervals *lis)
-  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
-    IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
-    LoopRegs(MDT), FirstDbgValue(0) {
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+    IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
          "Virtual registers must be removed prior to PostRA scheduling");
+
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
 }
 
 /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -68,7 +71,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // object. We don't have to worry about the case where the
       // object address is somehow being computed by the multiply,
       // because our callers only care when the result is an
-      // identifibale object.
+      // identifiable object.
       if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
@@ -135,10 +138,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 
 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
   BB = bb;
-  LoopRegs.Deps.clear();
-  if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch())
-      LoopRegs.VisitLoop(ML);
 }
 
 void ScheduleDAGInstrs::finishBlock() {
@@ -174,9 +173,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
   EndIndex = endcount;
   MISUnitMap.clear();
 
-  // Check to see if the scheduler cares about latencies.
-  UnitLatencies = forceUnitLatencies();
-
   ScheduleDAG::clearDAG();
 }
 
@@ -209,7 +205,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
       if (Reg == 0) continue;
 
       if (TRI->isPhysicalRegister(Reg))
-        Uses[Reg].push_back(&ExitSU);
+        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
       else {
         assert(!IsPostRA && "Virtual register encountered after regalloc.");
         addVRegUseDeps(&ExitSU, i);
@@ -225,59 +221,44 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
         if (!Uses.contains(Reg))
-          Uses[Reg].push_back(&ExitSU);
+          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
       }
   }
 }
 
 /// MO is an operand of SU's instruction that defines a physical register. Add
 /// data dependencies from SU to any uses of the physical register.
-void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
-                                           const MachineOperand &MO) {
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
   assert(MO.isDef() && "expect physreg def");
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-  unsigned DataLatency = SU->Latency;
 
   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
        Alias.isValid(); ++Alias) {
     if (!Uses.contains(*Alias))
       continue;
-    std::vector<SUnit*> &UseList = Uses[*Alias];
+    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
     for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-      SUnit *UseSU = UseList[i];
+      SUnit *UseSU = UseList[i].SU;
       if (UseSU == SU)
         continue;
-      unsigned LDataLatency = DataLatency;
-      // Optionally add in a special extra latency for nodes that
-      // feed addresses.
-      // TODO: Perhaps we should get rid of
-      // SpecialAddressLatency and just move this into
-      // adjustSchedDependency for the targets that care about it.
-      if (SpecialAddressLatency != 0 && !UnitLatencies &&
-          UseSU != &ExitSU) {
-        MachineInstr *UseMI = UseSU->getInstr();
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
-        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
-        if (RegUseIndex >= 0 &&
-            (UseMI->mayLoad() || UseMI->mayStore()) &&
-            (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
-            UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-          LDataLatency += SpecialAddressLatency;
-      }
-      // Adjust the dependence latency using operand def/use
-      // information (if any), and then allow the target to
-      // perform its own adjustments.
-      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
-      if (!UnitLatencies) {
-        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
-        dep.setLatency(Latency);
-
-        ST.adjustSchedDependency(SU, UseSU, dep);
-      }
+
+      SDep dep(SU, SDep::Data, *Alias);
+
+      // Adjust the dependence latency using operand def/use information,
+      // then allow the target to perform its own adjustments.
+      int UseOp = UseList[i].OpIdx;
+      MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
+      dep.setLatency(
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                         RegUse, UseOp, /*FindMin=*/false));
+      dep.setMinLatency(
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                         RegUse, UseOp, /*FindMin=*/true));
+
+      ST.adjustSchedDependency(SU, UseSU, dep);
       UseSU->addPred(dep);
     }
   }
@@ -301,20 +282,23 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
        Alias.isValid(); ++Alias) {
     if (!Defs.contains(*Alias))
       continue;
-    std::vector<SUnit *> &DefList = Defs[*Alias];
+    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
     for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-      SUnit *DefSU = DefList[i];
+      SUnit *DefSU = DefList[i].SU;
       if (DefSU == &ExitSU)
         continue;
       if (DefSU != SU &&
           (Kind != SDep::Output || !MO.isDead() ||
            !DefSU->getInstr()->registerDefIsDead(*Alias))) {
         if (Kind == SDep::Anti)
-          DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+          DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
         else {
-          unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                 DefSU->getInstr());
-          DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+          SDep Dep(SU, Kind, /*Reg=*/*Alias);
+          unsigned OutLatency =
+            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+          Dep.setMinLatency(OutLatency);
+          Dep.setLatency(OutLatency);
+          DefSU->addPred(Dep);
         }
       }
     }
@@ -324,61 +308,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
-    Uses[MO.getReg()].push_back(SU);
+    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
   }
   else {
-    addPhysRegDataDeps(SU, MO);
+    addPhysRegDataDeps(SU, OperIdx);
 
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's defs.
-    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
-
-    // If a def is going to wrap back around to the top of the loop,
-    // backschedule it.
-    if (!UnitLatencies && DefList.empty()) {
-      LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
-      if (I != LoopRegs.Deps.end()) {
-        const MachineOperand *UseMO = I->second.first;
-        unsigned Count = I->second.second;
-        const MachineInstr *UseMI = UseMO->getParent();
-        unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        const TargetSubtargetInfo &ST =
-          TM.getSubtarget<TargetSubtargetInfo>();
-        unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-        // TODO: If we knew the total depth of the region here, we could
-        // handle the case where the whole loop is inside the region but
-        // is large enough that the isScheduleHigh trick isn't needed.
-        if (UseMOIdx < UseMCID.getNumOperands()) {
-          // Currently, we only support scheduling regions consisting of
-          // single basic blocks. Check to see if the instruction is in
-          // the same region by checking to see if it has the same parent.
-          if (UseMI->getParent() != MI->getParent()) {
-            unsigned Latency = SU->Latency;
-            if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
-              Latency += SpecialAddressLatency;
-            // This is a wild guess as to the portion of the latency which
-            // will be overlapped by work done outside the current
-            // scheduling region.
-            Latency -= std::min(Latency, Count);
-            // Add the artificial edge.
-            ExitSU.addPred(SDep(SU, SDep::Order, Latency,
-                                /*Reg=*/0, /*isNormalMemory=*/false,
-                                /*isMustAlias=*/false,
-                                /*isArtificial=*/true));
-          } else if (SpecialAddressLatency > 0 &&
-                     UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
-            // The entire loop body is within the current scheduling region
-            // and the latency of this operation is assumed to be greater
-            // than the latency of the loop.
-            // TODO: Recursively mark data-edge predecessors as
-            //       isScheduleHigh too.
-            SU->isScheduleHigh = true;
-          }
-        }
-        LoopRegs.Deps.erase(I);
-      }
-    }
+    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
 
     // clear this register's use list
     if (Uses.contains(MO.getReg()))
@@ -393,11 +330,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // the block. Instead, we leave only one call at the back of the
     // DefList.
     if (SU->isCall) {
-      while (!DefList.empty() && DefList.back()->isCall)
+      while (!DefList.empty() && DefList.back().SU->isCall)
         DefList.pop_back();
     }
     // Defs are pushed in the order they are visited and never reordered.
-    DefList.push_back(SU);
+    DefList.push_back(PhysRegSUOper(SU, OperIdx));
   }
 }
 
@@ -430,9 +367,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   else {
     SUnit *DefSU = DefI->SU;
     if (DefSU != SU && DefSU != &ExitSU) {
-      unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                  DefSU->getInstr());
-      DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+      SDep Dep(SU, SDep::Output, Reg);
+      unsigned OutLatency =
+        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+      Dep.setMinLatency(OutLatency);
+      Dep.setLatency(OutLatency);
+      DefSU->addPred(Dep);
     }
     DefI->SU = SU;
   }
@@ -462,18 +402,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
     if (DefSU) {
       // The reaching Def lives within this scheduling region.
       // Create a data dependence.
-      //
-      // TODO: Handle "special" address latencies cleanly.
-      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
-      if (!UnitLatencies) {
-        // Adjust the dependence latency using operand def/use information, then
-        // allow the target to perform its own adjustments.
-        unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
-        dep.setLatency(Latency);
-
-        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
-      }
+      SDep dep(DefSU, SDep::Data, Reg);
+      // Adjust the dependence latency using operand def/use information, then
+      // allow the target to perform its own adjustments.
+      int DefOp = Def->findRegisterDefOperandIdx(Reg);
+      dep.setLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+      dep.setMinLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
       SU->addPred(dep);
     }
   }
@@ -481,14 +420,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
   // Add antidependence to the following def of the vreg it uses.
   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
   if (DefI != VRegDefs.end() && DefI->SU != SU)
-    DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
 }
 
 /// Return true if MI is an instruction we are unable to reason about
 /// (like a call or something with unmodeled side effects).
 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
   if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
-      (MI->hasVolatileMemoryRef() &&
+      (MI->hasOrderedMemoryRef() &&
        (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
     return true;
   return false;
@@ -621,8 +560,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // and stop descending.
   if (*Depth > 200 ||
       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
-    SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
-                      /*isNormalMemory=*/true));
+    SUb->addPred(SDep(SUa, SDep::MayAliasMem));
     return *Depth;
   }
   // Track current depth.
@@ -653,9 +591,9 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
     if (SU == *I)
       continue;
     if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
-      unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
-      (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
-                         /*isNormalMemory=*/true));
+      SDep Dep(SU, SDep::MayAliasMem);
+      Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+      (*I)->addPred(Dep);
     }
     // Now go through all the chain successors and iterate from them.
     // Keep track of visited nodes.
@@ -678,9 +616,11 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // If this is a false dependency,
   // do not add the edge, but rememeber the rejected node.
   if (!EnableAASchedMI ||
-      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
-    SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
-                      isNormalMemory));
+      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+    SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+    Dep.setLatency(TrueMemOrderLatency);
+    SUb->addPred(Dep);
+  }
   else {
     // Duplicate entries should be ignored.
     RejectList.insert(SUb);
@@ -718,10 +658,7 @@ void ScheduleDAGInstrs::initSUnits() {
     SU->isCommutable = MI->isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      computeLatency(SU);
+    SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
   }
 }
 
@@ -825,16 +762,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
-        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        I->second->addPred(SDep(SU, SDep::Barrier));
       }
       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
-        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+          SDep Dep(SU, SDep::Barrier);
+          Dep.setLatency(TrueMemOrderLatency);
+          I->second[i]->addPred(Dep);
+        }
       }
       // Add SU to the barrier chain.
       if (BarrierChain)
-        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
       BarrierChain = SU;
       // This is a barrier event that acts as a pivotal node in the DAG,
       // so it is safe to clear list of exposed nodes.
@@ -922,7 +862,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         // SU and barrier _could_ be reordered, they should not. In addition,
         // we have lost all RejectMemNodes below barrier.
         if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          BarrierChain->addPred(SDep(SU, SDep::Barrier));
       } else {
         // Treat all other stores conservatively.
         goto new_alias_chain;
@@ -931,10 +871,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (!ExitSU.isPred(SU))
         // Push store's up a bit to avoid them getting in between cmp
         // and branches.
-        ExitSU.addPred(SDep(SU, SDep::Order, 0,
-                            /*Reg=*/0, /*isNormalMemory=*/false,
-                            /*isMustAlias=*/false,
-                            /*isArtificial=*/true));
+        ExitSU.addPred(SDep(SU, SDep::Artificial));
     } else if (MI->mayLoad()) {
       bool MayAlias = true;
       if (MI->isInvariantLoad(AA)) {
@@ -969,7 +906,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         if (MayAlias && AliasChain)
           addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
         if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
     }
   }
@@ -982,34 +919,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   PendingLoads.clear();
 }
 
-void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
-  // Compute the latency for the node. We only provide a default for missing
-  // itineraries. Empty itineraries still have latency properties.
-  if (!InstrItins) {
-    SU->Latency = 1;
-
-    // Simplistic target-independent heuristic: assume that loads take
-    // extra time.
-    if (SU->getInstr()->mayLoad())
-      SU->Latency += 2;
-  } else {
-    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
-  }
-}
-
-unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
-                                                  const SDep& dep,
-                                                  bool FindMin) const {
-  // For a data dependency with a known register...
-  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
-    return 1;
-
-  return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
-                                    Use->getInstr(), dep.getReg(), FindMin);
-}
-
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   SU->getInstr()->dump();
+#endif
 }
 
 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
@@ -1029,3 +942,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+  bool isComplete() const { return DFSStack.empty(); }
+
+  void follow(const SUnit *SU) {
+    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+  }
+  void advance() { ++DFSStack.back().second; }
+
+  void backtrack() { DFSStack.pop_back(); }
+
+  const SUnit *getCurr() const { return DFSStack.back().first; }
+
+  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+  SUnit::const_pred_iterator getPredEnd() const {
+    return getCurr()->Preds.end();
+  }
+};
+} // anonymous
+
+void ScheduleDAGILP::resize(unsigned NumSUnits) {
+  ILPValues.resize(NumSUnits);
+}
+
+ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
+  return ILPValues[SU->NodeNum];
+}
+
+// A leaf node has an ILP of 1/1.
+static ILPValue initILP(const SUnit *SU) {
+  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
+  return ILPValue(Cnt, 1 + SU->getDepth());
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void ScheduleDAGILP::computeILP(const SUnit *Root) {
+  if (!IsBottomUp)
+    llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+  SchedDAGReverseDFS DFS;
+  // Mark a node visited by validating it.
+  ILPValues[Root->NodeNum] = initILP(Root);
+  DFS.follow(Root);
+  for (;;) {
+    // Traverse the leftmost path as far as possible.
+    while (DFS.getPred() != DFS.getPredEnd()) {
+      const SUnit *PredSU = DFS.getPred()->getSUnit();
+      DFS.advance();
+      // If the pred is already valid, skip it.
+      if (ILPValues[PredSU->NodeNum].isValid())
+        continue;
+      ILPValues[PredSU->NodeNum] = initILP(PredSU);
+      DFS.follow(PredSU);
+    }
+    // Visit the top of the stack in postorder and backtrack.
+    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
+    DFS.backtrack();
+    if (DFS.isComplete())
+      break;
+    // Add the recently finished predecessor's bottom-up descendent count.
+    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+  if (!isValid())
+    OS << "BADILP";
+  OS << InstrCount << " / " << Cycles << " = "
+     << format("%g", ((double)InstrCount / Cycles));
+}
+
+void ILPValue::dump() const {
+  dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+  Val.print(OS);
+  return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 38feee9..6e781b1 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Constants.h"
-#include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -35,7 +34,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const ScheduleDAG *G) {
-      return G->MF.getFunction()->getName();
+      return G->MF.getName();
     }
 
     static bool renderGraphFromBottomUp() {
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index e675366..2cd84d6 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() {
   ReservedScoreboard.reset();
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ScoreboardHazardRecognizer::Scoreboard::dump() const {
   dbgs() << "Scoreboard:\n";
 
@@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const {
     dbgs() << '\n';
   }
 }
+#endif
 
 bool ScoreboardHazardRecognizer::atIssueLimit() const {
   if (IssueWidth == 0)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4e29879..37d7731 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23,7 +23,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -194,6 +194,7 @@ namespace {
     SDValue visitOR(SDNode *N);
     SDValue visitXOR(SDNode *N);
     SDValue SimplifyVBinOp(SDNode *N);
+    SDValue SimplifyVUnaryOp(SDNode *N);
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
@@ -269,6 +270,8 @@ namespace {
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue TransformFPLoadStorePair(SDNode *N);
+    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+    SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -300,6 +303,11 @@ namespace {
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Merge consecutive store operations into a wide store.
+    /// This optimization uses wide integers or vectors when possible.
+    /// \return True if some memory operations were changed.
+    bool MergeConsecutiveStores(StoreSDNode *N);
+
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
       : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -385,10 +393,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                                const TargetLowering &TLI,
                                const TargetOptions *Options,
                                unsigned Depth = 0) {
-  // No compile time optimizations on this type.
-  if (Op.getValueType() == MVT::ppcf128)
-    return 0;
-
   // fneg is removable even if it has multiple uses.
   if (Op.getOpcode() == ISD::FNEG) return 2;
 
@@ -413,7 +417,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
       return 0;
 
-    // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
                                     Options, Depth + 1))
       return V;
@@ -1643,7 +1647,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     return N0.getOperand(0);
   // fold C2-(A+C1) -> (C2-C1)-A
   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
-    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+                                   VT);
     return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
                        N1.getOperand(0));
   }
@@ -2345,16 +2350,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // we don't want to undo this promotion.
   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   // on scalars.
-  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      && Level == AfterLegalizeTypes) {
+  if ((N0.getOpcode() == ISD::BITCAST ||
+       N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+      Level == AfterLegalizeTypes) {
     SDValue In0 = N0.getOperand(0);
     SDValue In1 = N1.getOperand(0);
     EVT In0Ty = In0.getValueType();
     EVT In1Ty = In1.getValueType();
-    // If both incoming values are integers, and the original types are the same.
+    DebugLoc DL = N->getDebugLoc();
+    // If both incoming values are integers, and the original types are the
+    // same.
     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
-      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
-      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
       AddToWorkList(Op.getNode());
       return BC;
     }
@@ -2496,8 +2504,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         // lanes of the constant together.
         EVT VT = Vector->getValueType(0);
         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+        // If the splat value has been compressed to a bitlength lower
+        // than the size of the vector lane, we need to re-expand it to
+        // the lane size.
+        if (BitWidth > SplatBitSize)
+          for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+               SplatBitSize < BitWidth;
+               SplatBitSize = SplatBitSize * 2)
+            SplatValue |= SplatValue.shl(SplatBitSize);
+
         Constant = APInt::getAllOnesValue(BitWidth);
-        for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+        for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
           Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
       }
     }
@@ -2984,7 +3002,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
     return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
-  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
     return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
   return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
                      DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
@@ -3202,11 +3220,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if ((LShVal + RShVal) != OpSizeInBits)
       return 0;
 
-    SDValue Rot;
-    if (HasROTL)
-      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
-    else
-      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+    SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+                              LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
 
     // If there is an AND of either shifted operand, apply it to the result.
     if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -3239,12 +3254,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if (ConstantSDNode *SUBC =
           dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
-        if (HasROTL)
-          return DAG.getNode(ISD::ROTL, DL, VT,
-                             LHSShiftArg, LHSShiftAmt).getNode();
-        else
-          return DAG.getNode(ISD::ROTR, DL, VT,
-                             LHSShiftArg, RHSShiftAmt).getNode();
+        return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+                           HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
       }
     }
   }
@@ -3256,25 +3267,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
     if (ConstantSDNode *SUBC =
           dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
       if (SUBC->getAPIntValue() == OpSizeInBits) {
-        if (HasROTR)
-          return DAG.getNode(ISD::ROTR, DL, VT,
-                             LHSShiftArg, RHSShiftAmt).getNode();
-        else
-          return DAG.getNode(ISD::ROTL, DL, VT,
-                             LHSShiftArg, LHSShiftAmt).getNode();
+        return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+                           HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
       }
     }
   }
 
   // Look for sign/zext/any-extended or truncate cases:
-  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
-       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
-      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
-       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+       LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+       RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
     SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
     SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
     if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -4046,7 +4053,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   if (VT.isInteger() &&
       (VT0 == MVT::i1 ||
        (VT0.isInteger() &&
-        TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
+        TLI.getBooleanContents(false) ==
+        TargetLowering::ZeroOrOneBooleanContent)) &&
       N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
     SDValue XORNode;
     if (VT == VT0)
@@ -4412,20 +4420,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       // If the desired elements are smaller or larger than the source
       // elements we can use a matching integer vector type and then
       // truncate/sign extend
-      else {
-        EVT MatchingElementType =
-          EVT::getIntegerVT(*DAG.getContext(),
-                            N0VT.getScalarType().getSizeInBits());
-        EVT MatchingVectorType =
-          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
-                           N0VT.getVectorNumElements());
+      EVT MatchingElementType =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          N0VT.getScalarType().getSizeInBits());
+      EVT MatchingVectorType =
+        EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                         N0VT.getVectorNumElements());
 
-        if (SVT == MatchingVectorType) {
-          SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
-                                 N0.getOperand(0), N0.getOperand(1),
-                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
-          return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
-        }
+      if (SVT == MatchingVectorType) {
+        SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+                               N0.getOperand(0), N0.getOperand(1),
+                               cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
       }
     }
 
@@ -5235,13 +5241,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       // if the source is smaller than the dest, we still need an extend
       return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
                          N0.getOperand(0));
-    else if (N0.getOperand(0).getValueType().bitsGT(VT))
+    if (N0.getOperand(0).getValueType().bitsGT(VT))
       // if the source is larger than the dest, than we just need the truncate
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
-    else
-      // if the source and dest are the same type, we can drop both the extend
-      // and the truncate.
-      return N0.getOperand(0);
+    // if the source and dest are the same type, we can drop both the extend
+    // and the truncate.
+    return N0.getOperand(0);
   }
 
   // Fold extract-and-trunc into a narrow extract. For example:
@@ -5301,6 +5306,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     if (Reduced.getNode())
       return Reduced;
   }
+  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+  // where ... are all 'undef'.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+    SmallVector<EVT, 8> VTs;
+    SDValue V;
+    unsigned Idx = 0;
+    unsigned NumDefs = 0;
+
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+      SDValue X = N0.getOperand(i);
+      if (X.getOpcode() != ISD::UNDEF) {
+        V = X;
+        Idx = i;
+        NumDefs++;
+      }
+      // Stop if more than one members are non-undef.
+      if (NumDefs > 1)
+        break;
+      VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+                                     VT.getVectorElementType(),
+                                     X.getValueType().getVectorNumElements()));
+    }
+
+    if (NumDefs == 0)
+      return DAG.getUNDEF(VT);
+
+    if (NumDefs == 1) {
+      assert(V.getNode() && "The single defined operand is empty!");
+      SmallVector<SDValue, 8> Opnds;
+      for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+        if (i != Idx) {
+          Opnds.push_back(DAG.getUNDEF(VTs[i]));
+          continue;
+        }
+        SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+        AddToWorkList(NV.getNode());
+        Opnds.push_back(NV);
+      }
+      return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                         &Opnds[0], Opnds.size());
+    }
+  }
 
   // Simplify the operands using demanded-bits information.
   if (!VT.isVector() &&
@@ -5338,7 +5385,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
       !LD2->isVolatile() &&
       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
     unsigned Align = LD1->getAlignment();
-    unsigned NewAlign = TLI.getTargetData()->
+    unsigned NewAlign = TLI.getDataLayout()->
       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
 
     if (NewAlign <= Align &&
@@ -5407,7 +5454,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       !cast<LoadSDNode>(N0)->isVolatile() &&
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned Align = TLI.getTargetData()->
+    unsigned Align = TLI.getDataLayout()->
       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
     unsigned OrigAlign = LN0->getAlignment();
 
@@ -5430,7 +5477,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // This often reduces constant pool loads.
   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
-      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+      N0.getNode()->hasOneUse() && VT.isInteger() &&
+      !VT.isVector() && !N0.getValueType().isVector()) {
     SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
     AddToWorkList(NewConv.getNode());
@@ -5653,7 +5701,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   }
 
   // fold (fadd c1, c2) -> c1 + c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
@@ -5664,12 +5712,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
@@ -5681,6 +5729,139 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
 
+  // If allow, fold (fadd (fneg x), x) -> 0.0
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+    return DAG.getConstantFP(0.0, VT);
+  }
+
+    // If allow, fold (fadd x, (fneg x)) -> 0.0
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+    return DAG.getConstantFP(0.0, VT);
+  }
+
+  // In unsafe math mode, we can fold chains of FADD's of the same value
+  // into multiplications.  This transform is not safe in general because
+  // we are reducing the number of rounding steps.
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+      !N0CFP && !N1CFP) {
+    if (N0.getOpcode() == ISD::FMUL) {
+      ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+      ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+      // (fadd (fmul c, x), x) -> (fmul c+1, x)
+      if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP00, 0),
+                                     DAG.getConstantFP(1.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N1, NewCFP);
+      }
+
+      // (fadd (fmul x, c), x) -> (fmul c+1, x)
+      if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP01, 0),
+                                     DAG.getConstantFP(1.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N1, NewCFP);
+      }
+
+      // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+      if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
+          N0.getOperand(0) == N1) {
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N1, DAG.getConstantFP(3.0, VT));
+      }
+
+      // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+      if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
+          N1.getOperand(0) == N1.getOperand(1) &&
+          N0.getOperand(1) == N1.getOperand(0)) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP00, 0),
+                                     DAG.getConstantFP(2.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0.getOperand(1), NewCFP);
+      }
+
+      // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+      if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+          N1.getOperand(0) == N1.getOperand(1) &&
+          N0.getOperand(0) == N1.getOperand(0)) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP01, 0),
+                                     DAG.getConstantFP(2.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0.getOperand(0), NewCFP);
+      }
+    }
+
+    if (N1.getOpcode() == ISD::FMUL) {
+      ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+      ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+
+      // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+      if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP10, 0),
+                                     DAG.getConstantFP(1.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0, NewCFP);
+      }
+
+      // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+      if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP11, 0),
+                                     DAG.getConstantFP(1.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0, NewCFP);
+      }
+
+      // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+      if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
+          N1.getOperand(0) == N0) {
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0, DAG.getConstantFP(3.0, VT));
+      }
+
+      // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
+      if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
+          N1.getOperand(0) == N1.getOperand(1) &&
+          N0.getOperand(1) == N1.getOperand(0)) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP10, 0),
+                                     DAG.getConstantFP(2.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0.getOperand(1), NewCFP);
+      }
+
+      // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
+      if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
+          N1.getOperand(0) == N1.getOperand(1) &&
+          N0.getOperand(0) == N1.getOperand(0)) {
+        SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                     SDValue(CFP11, 0),
+                                     DAG.getConstantFP(2.0, VT));
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0.getOperand(0), NewCFP);
+      }
+    }
+
+    // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+    if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+        N0.getOperand(0) == N0.getOperand(1) &&
+        N1.getOperand(0) == N1.getOperand(1) &&
+        N0.getOperand(0) == N1.getOperand(0)) {
+      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                         N0.getOperand(0),
+                         DAG.getConstantFP(4.0, VT));
+    }
+  }
+
   // FADD -> FMA combines:
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
@@ -5692,8 +5873,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
                          N0.getOperand(0), N0.getOperand(1), N1);
     }
-  
-    // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+
+    // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
     // Note: Commutes FADD operands.
     if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
       return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
@@ -5719,7 +5900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // fold (fsub c1, c2) -> c1-c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
   // fold (fsub A, 0) -> A
   if (DAG.getTarget().Options.UnsafeFPMath &&
@@ -5811,7 +5992,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // fold (fmul c1, c2) -> c1*c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
@@ -5867,7 +6048,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
 
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    if (N0CFP && N0CFP->isZero())
+      return N2;
+    if (N1CFP && N1CFP->isZero())
+      return N2;
+  }
   if (N0CFP && N0CFP->isExactlyValue(1.0))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
   if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -5877,6 +6065,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
 
+  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N2.getOpcode() == ISD::FMUL &&
+      N0 == N2.getOperand(0) &&
+      N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
+    return DAG.getNode(ISD::FMUL, dl, VT, N0,
+                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+  }
+
+
+  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0.getOpcode() == ISD::FMUL && N1CFP &&
+      N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
+    return DAG.getNode(ISD::FMA, dl, VT,
+                       N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+                       N2);
+  }
+
+  // (fma x, 1, y) -> (fadd x, y)
+  // (fma x, -1, y) -> (fadd (fneg x), y)
+  if (N1CFP) {
+    if (N1CFP->isExactlyValue(1.0))
+      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+    if (N1CFP->isExactlyValue(-1.0) &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+      AddToWorkList(RHSNeg.getNode());
+      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+    }
+  }
+
+  // (fma x, c, x) -> (fmul x, (c+1))
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
+    return DAG.getNode(ISD::FMUL, dl, VT,
+                       N0,
+                       DAG.getNode(ISD::FADD, dl, VT,
+                                   N1, DAG.getConstantFP(1.0, VT)));
+  }
+
+  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+    return DAG.getNode(ISD::FMUL, dl, VT,
+                       N0,
+                       DAG.getNode(ISD::FADD, dl, VT,
+                                   N1, DAG.getConstantFP(-1.0, VT)));
+  }
+
+
   return SDValue();
 }
 
@@ -5895,11 +6135,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   }
 
   // fold (fdiv c1, c2) -> c1/c2
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
 
   // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
-  if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+  if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
     // Compute the reciprocal 1.0 / c2.
     APFloat N1APF = N1CFP->getValueAPF();
     APFloat Recip(N1APF.getSemantics(), 1); // 1.0
@@ -5942,7 +6182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (frem c1, c2) -> fmod(c1,c2)
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+  if (N0CFP && N1CFP)
     return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
 
   return SDValue();
@@ -5955,7 +6195,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
 
-  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
+  if (N0CFP && N1CFP)  // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
 
   if (N1CFP) {
@@ -6005,7 +6245,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128 &&
+  if (N0C &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6062,7 +6302,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128 &&
+  if (N0C &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6117,7 +6357,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6130,7 +6370,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fp_round c1fp) -> c1fp
-  if (N0CFP && N0.getValueType() != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
 
   // fold (fp_round (fp_extend x)) -> x
@@ -6184,7 +6424,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     return SDValue();
 
   // fold (fp_extend c1fp) -> c1fp
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
 
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@@ -6225,6 +6465,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVUnaryOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
                          &DAG.getTarget().Options))
     return GetNegatedExpression(N0, DAG, LegalOperations);
@@ -6246,6 +6491,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
     }
   }
 
+  // (fneg (fmul c, x)) -> (fmul -c, x)
+  if (N0.getOpcode() == ISD::FMUL) {
+    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+    if (CFP1) {
+      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                         N0.getOperand(0),
+                         DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+                                     N0.getOperand(1)));
+    }
+  }
+
   return SDValue();
 }
 
@@ -6255,7 +6511,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6267,7 +6523,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6279,7 +6535,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
 
   return SDValue();
@@ -6290,8 +6546,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVUnaryOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
   // fold (fabs c1) -> fabs(c1)
-  if (N0CFP && VT != MVT::ppcf128)
+  if (N0CFP)
     return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
   // fold (fabs (fabs x)) -> (fabs x)
   if (N0.getOpcode() == ISD::FABS)
@@ -6511,7 +6772,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   } else
     return false;
 
-  TargetLowering::AddrMode AM;
+  AddrMode AM;
   if (N->getOpcode() == ISD::ADD) {
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
@@ -7138,7 +7399,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+      if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -7200,7 +7461,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
     unsigned LDAlign = LD->getAlignment();
     unsigned STAlign = ST->getAlignment();
     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
     if (LDAlign < ABIAlign || STAlign < ABIAlign)
       return SDValue();
 
@@ -7225,6 +7486,433 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
+/// Returns the base pointer and an integer offset from that object.
+static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
+  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
+    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+    SDValue Base = Ptr->getOperand(0);
+    return std::make_pair(Base, Offset);
+  }
+
+  return std::make_pair(Ptr, 0);
+}
+
+/// Holds a pointer to an LSBaseSDNode as well as information on where it
+/// is located in a sequence of memory operations connected by a chain.
+struct MemOpLink {
+  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+    MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+  // Ptr to the mem node.
+  LSBaseSDNode *MemNode;
+  // Offset from the base ptr.
+  int64_t OffsetFromBase;
+  // What is the sequence number of this mem node.
+  // Lowest mem operand in the DAG starts at zero.
+  unsigned SequenceNum;
+};
+
+/// Sorts store nodes in a link according to their offset from a shared
+// base ptr.
+struct ConsecutiveMemoryChainSorter {
+  bool operator()(MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+  }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+
+  // Don't merge vectors into wider inputs.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
+
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants or loads.
+  SDValue StoredVal = St->getValue();
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+      !IsLoadSrc)
+    return false;
+
+  // Only look at ends of store sequences.
+  SDValue Chain = SDValue(St, 1);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // This holds the base pointer and the offset in bytes from the base pointer.
+  std::pair<SDValue, int64_t> BasePtr =
+      GetPointerBaseAndOffset(St->getBasePtr());
+
+  // We must have a base and an offset.
+  if (!BasePtr.first.getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.first.getOpcode() == ISD::UNDEF)
+    return false;
+
+  SmallVector<MemOpLink, 8> StoreNodes;
+  // Walk up the chain and look for nodes with offsets from the same
+  // base pointer. Stop when reaching an instruction with a different kind
+  // or instruction which has a different base pointer.
+  unsigned Seq = 0;
+  StoreSDNode *Index = St;
+  while (Index) {
+    // If the chain has more than one use, then we can't reorder the mem ops.
+    if (Index != St && !SDValue(Index, 1)->hasOneUse())
+      break;
+
+    // Find the base pointer and offset for this memory node.
+    std::pair<SDValue, int64_t> Ptr =
+      GetPointerBaseAndOffset(Index->getBasePtr());
+
+    // Check that the base pointer is the same as the original one.
+    if (Ptr.first.getNode() != BasePtr.first.getNode())
+      break;
+
+    // Check that the alignment is the same.
+    if (Index->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Index->isVolatile() || Index->isIndexed())
+      break;
+
+    // No truncation.
+    if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+      if (St->isTruncatingStore())
+        break;
+
+    // The stored memory type must be the same.
+    if (Index->getMemoryVT() != MemVT)
+      break;
+
+    // We do not allow unaligned stores because we want to prevent overriding
+    // stores.
+    if (Index->getAlignment()*8 != MemVT.getSizeInBits())
+      break;
+
+    // We found a potential memory operand to merge.
+    StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+
+    // Move up the chain to the next memory operation.
+    Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
+  }
+
+  // Check if there is anything to merge.
+  if (StoreNodes.size() < 2)
+    return false;
+
+  // Sort the memory operands according to their distance from the base pointer.
+  std::sort(StoreNodes.begin(), StoreNodes.end(),
+            ConsecutiveMemoryChainSorter());
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // store memory address.
+  unsigned LastConsecutiveStore = 0;
+  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+  for (unsigned i=1; i<StoreNodes.size(); ++i) {
+    int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+      break;
+
+    // Mark this node as useful.
+    LastConsecutiveStore = i;
+  }
+
+  // The node with the lowest store address.
+  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+
+  // Store the constants into memory as one consecutive store.
+  if (!IsLoadSrc) {
+    unsigned LastLegalType = 0;
+    unsigned LastLegalVectorType = 0;
+    bool NonZero = false;
+    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      SDValue StoredVal = St->getValue();
+
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+        NonZero |= !C->isNullValue();
+      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+        NonZero |= !C->getConstantFPValue()->isNullValue();
+      } else {
+        // Non constant.
+        break;
+      }
+
+      // Find a legal type for the constant store.
+      unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      if (TLI.isTypeLegal(StoreTy))
+        LastLegalType = i+1;
+
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      if (TLI.isTypeLegal(Ty))
+        LastLegalVectorType = i + 1;
+    }
+
+    // We only use vectors if the constant is known to be zero.
+    if (NonZero)
+      LastLegalVectorType = 0;
+
+    // Check if we found a legal integer type to store.
+    if (LastLegalType == 0 && LastLegalVectorType == 0)
+      return false;
+
+    bool UseVector = LastLegalVectorType > LastLegalType;
+    unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+    // Make sure we have something to merge.
+    if (NumElem < 2)
+      return false;
+
+    unsigned EarliestNodeUsed = 0;
+    for (unsigned i=0; i < NumElem; ++i) {
+      // Find a chain for the new wide-store operand. Notice that some
+      // of the store nodes that we found may not be selected for inclusion
+      // in the wide store. The chain we use needs to be the chain of the
+      // earliest store node which is *used* and replaced by the wide store.
+      if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+        EarliestNodeUsed = i;
+    }
+
+    // The earliest Node in the DAG.
+    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+    DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
+
+    SDValue StoredVal;
+    if (UseVector) {
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+      StoredVal = DAG.getConstant(0, Ty);
+    } else {
+      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+      APInt StoreInt(StoreBW, 0);
+
+      // Construct a single integer constant which is made of the smaller
+      // constant inputs.
+      bool IsLE = TLI.isLittleEndian();
+      for (unsigned i = 0; i < NumElem ; ++i) {
+        unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+        SDValue Val = St->getValue();
+        StoreInt<<=ElementSizeBytes*8;
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+          StoreInt|=C->getAPIntValue().zext(StoreBW);
+        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+          StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        } else {
+          assert(false && "Invalid constant element type");
+        }
+      }
+
+      // Create the new Load and Store operations.
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      StoredVal = DAG.getConstant(StoreInt, StoreTy);
+    }
+
+    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+                                    FirstInChain->getBasePtr(),
+                                    FirstInChain->getPointerInfo(),
+                                    false, false,
+                                    FirstInChain->getAlignment());
+
+    // Replace the first store with the new store
+    CombineTo(EarliestOp, NewStore);
+    // Erase all other stores.
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      if (StoreNodes[i].MemNode == EarliestOp)
+        continue;
+      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      // ReplaceAllUsesWith will replace all uses that existed when it was
+      // called, but graph optimizations may cause new ones to appear. For
+      // example, the case in pr14333 looks like
+      //
+      //  St's chain -> St -> another store -> X
+      //
+      // And the only difference from St to the other store is the chain.
+      // When we change it's chain to be St's chain they become identical,
+      // get CSEed and the net result is that X is now a use of St.
+      // Since we know that St is redundant, just iterate.
+      while (!St->use_empty())
+        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+      removeFromWorkList(St);
+      DAG.DeleteNode(St);
+    }
+
+    return true;
+  }
+
+  // Below we handle the case of multiple consecutive stores that
+  // come from multiple consecutive loads. We merge them into a single
+  // wide load and a single wide store.
+
+  // Look for load nodes which are used by the stored values.
+  SmallVector<MemOpLink, 8> LoadNodes;
+
+  // Find acceptable loads. Loads need to have the same chain (token factor),
+  // must not be zext, volatile, indexed, and they must be consecutive.
+  SDValue LdBasePtr;
+  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+    StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+    if (!Ld) break;
+
+    // Loads must only have one use.
+    if (!Ld->hasNUsesOfValue(1, 0))
+      break;
+
+    // Check that the alignment is the same as the stores.
+    if (Ld->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Ld->isVolatile() || Ld->isIndexed())
+      break;
+
+    // We do not accept ext loads.
+    if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+      break;
+
+    // The stored memory type must be the same.
+    if (Ld->getMemoryVT() != MemVT)
+      break;
+
+    std::pair<SDValue, int64_t> LdPtr =
+    GetPointerBaseAndOffset(Ld->getBasePtr());
+
+    // If this is not the first ptr that we check.
+    if (LdBasePtr.getNode()) {
+      // The base ptr must be the same.
+      if (LdPtr.first != LdBasePtr)
+        break;
+    } else {
+      // Check that all other base pointers are the same as this one.
+      LdBasePtr = LdPtr.first;
+    }
+
+    // We found a potential memory operand to merge.
+    LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+  }
+
+  if (LoadNodes.size() < 2)
+    return false;
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // load memory address. These variables hold the index in the store node
+  // array.
+  unsigned LastConsecutiveLoad = 0;
+  // This variable refers to the size and not index in the array.
+  unsigned LastLegalVectorType = 0;
+  unsigned LastLegalIntegerType = 0;
+  StartAddress = LoadNodes[0].OffsetFromBase;
+  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+    // All loads much share the same chain.
+    if (LoadNodes[i].MemNode->getChain() != FirstChain)
+      break;
+    
+    int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+      break;
+    LastConsecutiveLoad = i;
+
+    // Find a legal type for the vector store.
+    EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+    if (TLI.isTypeLegal(StoreTy))
+      LastLegalVectorType = i + 1;
+
+    // Find a legal type for the integer store.
+    unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    if (TLI.isTypeLegal(StoreTy))
+      LastLegalIntegerType = i + 1;
+  }
+
+  // Only use vector types if the vector type is larger than the integer type.
+  // If they are the same, use integers.
+  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType;
+  unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+  // We add +1 here because the LastXXX variables refer to location while
+  // the NumElem refers to array/index size.
+  unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+  NumElem = std::min(LastLegalType, NumElem);
+
+  if (NumElem < 2)
+    return false;
+
+  // The earliest Node in the DAG.
+  unsigned EarliestNodeUsed = 0;
+  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  for (unsigned i=1; i<NumElem; ++i) {
+    // Find a chain for the new wide-store operand. Notice that some
+    // of the store nodes that we found may not be selected for inclusion
+    // in the wide store. The chain we use needs to be the chain of the
+    // earliest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+      EarliestNodeUsed = i;
+  }
+
+  // Find if it is better to use vectors or integers to load and store
+  // to memory.
+  EVT JointMemOpVT;
+  if (UseVectorTy) {
+    JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+  } else {
+    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+  }
+
+  DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc();
+  DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc();
+
+  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+                                FirstLoad->getChain(),
+                                FirstLoad->getBasePtr(),
+                                FirstLoad->getPointerInfo(),
+                                false, false, false,
+                                FirstLoad->getAlignment());
+
+  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+                                  FirstInChain->getBasePtr(),
+                                  FirstInChain->getPointerInfo(), false, false,
+                                  FirstInChain->getAlignment());
+
+  // Replace one of the loads with the new load.
+  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+                                SDValue(NewLoad.getNode(), 1));
+
+  // Remove the rest of the load chains.
+  for (unsigned i = 1; i < NumElem ; ++i) {
+    // Replace all chain users of the old load nodes with the chain of the new
+    // load node.
+    LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+  }
+
+  // Replace the first store with the new store.
+  CombineTo(EarliestOp, NewStore);
+  // Erase all other stores.
+  for (unsigned i = 0; i < NumElem ; ++i) {
+    // Remove all Store nodes.
+    if (StoreNodes[i].MemNode == EarliestOp)
+      continue;
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+    removeFromWorkList(St);
+    DAG.DeleteNode(St);
+  }
+
+  return true;
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -7237,7 +7925,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = TLI.getTargetData()->
+    unsigned Align = TLI.getDataLayout()->
       getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
     if (Align <= OrigAlign &&
         ((!LegalOperations && !ST->isVolatile()) ||
@@ -7426,6 +8114,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                              ST->getAlignment());
   }
 
+  // Only perform this optimization before the types are legal, because we
+  // don't want to perform this optimization on every DAGCombine invocation.
+  if (!LegalTypes && MergeConsecutiveStores(ST))
+    return SDValue(N, 0);
+
   return ReduceLoadOpStoreWidth(N);
 }
 
@@ -7504,9 +8197,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   // We only perform this optimization before the op legalization phase because
-  // we may introduce new vector instructions which are not backed by TD patterns.
-  // For example on AVX, extracting elements from a wide vector without using
-  // extract_subvector.
+  // we may introduce new vector instructions which are not backed by TD
+  // patterns. For example on AVX, extracting elements from a wide vector
+  // without using extract_subvector.
   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
       && ConstEltNo && !LegalOperations) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
@@ -7625,7 +8318,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()
+        TLI.getDataLayout()
             ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
@@ -7690,15 +8383,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+  // We perform this optimization post type-legalization because
+  // the type-legalizer often scalarizes integer-promoted vectors.
+  // Performing this optimization before may create bit-casts which
+  // will be type-legalized to complex code sequences.
+  // We perform this optimization only before the operation legalizer because we
+  // may introduce illegal operations.
+  if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+    return SDValue();
+
   unsigned NumInScalars = N->getNumOperands();
   DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
 
-  // A vector built entirely of undefs is undef.
-  if (ISD::allOperandsUndef(N))
-    return DAG.getUNDEF(VT);
-
   // Check to see if this is a BUILD_VECTOR of a bunch of values
   // which come from any_extend or zero_extend nodes. If so, we can create
   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7741,64 +8440,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // In order to have valid types, all of the inputs must be extended from the
   // same source type and all of the inputs must be any or zero extend.
   // Scalar sizes must be a power of two.
-  EVT OutScalarTy = N->getValueType(0).getScalarType();
+  EVT OutScalarTy = VT.getScalarType();
   bool ValidTypes = SourceType != MVT::Other &&
                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
                  isPowerOf2_32(SourceType.getSizeInBits());
 
-  // We perform this optimization post type-legalization because
-  // the type-legalizer often scalarizes integer-promoted vectors.
-  // Performing this optimization before may create bit-casts which
-  // will be type-legalized to complex code sequences.
-  // We perform this optimization only before the operation legalizer because we
-  // may introduce illegal operations.
   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   // turn into a single shuffle instruction.
-  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
-      ValidTypes) {
-    bool isLE = TLI.isLittleEndian();
-    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
-    assert(ElemRatio > 1 && "Invalid element size ratio");
-    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                                 DAG.getConstant(0, SourceType);
-
-    unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
-    SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
-
-    // Populate the new build_vector
-    for (unsigned i=0; i < N->getNumOperands(); ++i) {
-      SDValue Cast = N->getOperand(i);
-      assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
-              Cast.getOpcode() == ISD::ZERO_EXTEND ||
-              Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
-      SDValue In;
-      if (Cast.getOpcode() == ISD::UNDEF)
-        In = DAG.getUNDEF(SourceType);
-      else
-        In = Cast->getOperand(0);
-      unsigned Index = isLE ? (i * ElemRatio) :
-                              (i * ElemRatio + (ElemRatio - 1));
+  if (!ValidTypes)
+    return SDValue();
+
+  bool isLE = TLI.isLittleEndian();
+  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+  assert(ElemRatio > 1 && "Invalid element size ratio");
+  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+                               DAG.getConstant(0, SourceType);
+
+  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+  // Populate the new build_vector
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Cast = N->getOperand(i);
+    assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+            Cast.getOpcode() == ISD::ZERO_EXTEND ||
+            Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+    SDValue In;
+    if (Cast.getOpcode() == ISD::UNDEF)
+      In = DAG.getUNDEF(SourceType);
+    else
+      In = Cast->getOperand(0);
+    unsigned Index = isLE ? (i * ElemRatio) :
+                            (i * ElemRatio + (ElemRatio - 1));
+
+    assert(Index < Ops.size() && "Invalid index");
+    Ops[Index] = In;
+  }
+
+  // The type of the new BUILD_VECTOR node.
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+         "Invalid vector size");
+  // Check if the new vector type is legal.
+  if (!isTypeLegal(VecVT)) return SDValue();
+
+  // Make the new BUILD_VECTOR.
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+  // The new BUILD_VECTOR node has the potential to be further optimized.
+  AddToWorkList(BV.getNode());
+  // Bitcast to the desired type.
+  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT SrcVT = MVT::Other;
+  unsigned Opcode = ISD::DELETED_NODE;
+  unsigned NumDefs = 0;
 
-      assert(Index < Ops.size() && "Invalid index");
-      Ops[Index] = In;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opc = In.getOpcode();
+
+    if (Opc == ISD::UNDEF)
+      continue;
+
+    // If all scalar values are floats and converted from integers.
+    if (Opcode == ISD::DELETED_NODE &&
+        (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+      Opcode = Opc;
+      // If not supported by target, bail out.
+      if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+          TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+        return SDValue();
     }
+    if (Opc != Opcode)
+      return SDValue();
 
-    // The type of the new BUILD_VECTOR node.
-    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
-    assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
-           "Invalid vector size");
-    // Check if the new vector type is legal.
-    if (!isTypeLegal(VecVT)) return SDValue();
+    EVT InVT = In.getOperand(0).getValueType();
 
-    // Make the new BUILD_VECTOR.
-    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                                 VecVT, &Ops[0], Ops.size());
+    // If all scalar values are typed differently, bail out. It's chosen to
+    // simplify BUILD_VECTOR of integer types.
+    if (SrcVT == MVT::Other)
+      SrcVT = InVT;
+    if (SrcVT != InVT)
+      return SDValue();
+    NumDefs++;
+  }
+
+  // If the vector has just one element defined, it's not worth to fold it into
+  // a vectorized one.
+  if (NumDefs < 2)
+    return SDValue();
 
-    // The new BUILD_VECTOR node has the potential to be further optimized.
-    AddToWorkList(BV.getNode());
-    // Bitcast to the desired type.
-    return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+         && "Should only handle conversion from integer to float.");
+  assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+  SmallVector<SDValue, 8> Opnds;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+
+    if (In.getOpcode() == ISD::UNDEF)
+      Opnds.push_back(DAG.getUNDEF(SrcVT));
+    else
+      Opnds.push_back(In.getOperand(0));
   }
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+                           &Opnds[0], Opnds.size());
+  AddToWorkList(BV.getNode());
+
+  return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // A vector built entirely of undefs is undef.
+  if (ISD::allOperandsUndef(N))
+    return DAG.getUNDEF(VT);
+
+  SDValue V = reduceBuildVecExtToExtBuildVec(N);
+  if (V.getNode())
+    return V;
+
+  V = reduceBuildVecConvertToConvertBuildVec(N);
+  if (V.getNode())
+    return V;
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -7876,15 +8652,22 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
         return SDValue();
 
+      // If the input vector type has a different base type to the output
+      // vector type, bail out.
+      if (VecIn1.getValueType().getVectorElementType() !=
+          VT.getVectorElementType())
+        return SDValue();
+
       // Widen the input vector by adding undef values.
-      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                            VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
     }
 
     // If VecIn2 is unused then change it to undef.
     VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
 
-    // Check that we were able to transform all incoming values to the same type.
+    // Check that we were able to transform all incoming values to the same
+    // type.
     if (VecIn2.getValueType() != VecIn1.getValueType() ||
         VecIn1.getValueType() != VT)
           return SDValue();
@@ -7897,7 +8680,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     SDValue Ops[2];
     Ops[0] = VecIn1;
     Ops[1] = VecIn2;
-    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+    return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
   }
 
   return SDValue();
@@ -7933,8 +8716,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       return SDValue();
 
     // Only handle cases where both indexes are constants with the same type.
-    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
 
     if (InsIdx && ExtIdx &&
         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@@ -7951,6 +8734,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     }
   }
 
+  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine:
+    //    (extract_subvec (concat V1, V2, ...), i)
+    // Into:
+    //    Vi if possible
+    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+    if (V->getOperand(0).getValueType() != NVT)
+      return SDValue();
+    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned NumElems = NVT.getVectorNumElements();
+    assert((Idx % NumElems) == 0 &&
+           "IDX in concat is not a multiple of the result vector length.");
+    return V->getOperand(Idx / NumElems);
+  }
+
   return SDValue();
 }
 
@@ -8266,6 +9064,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   return SDValue();
 }
 
+/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (LegalOperations) return SDValue();
+
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVUnaryOp only works on vectors!");
+
+  SDValue N0 = N->getOperand(0);
+
+  if (N0.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+    SDValue Op = N0.getOperand(i);
+    if (Op.getOpcode() != ISD::UNDEF &&
+        Op.getOpcode() != ISD::ConstantFP)
+      break;
+    EVT EltVT = Op.getValueType();
+    SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op);
+    if (FoldOp.getOpcode() != ISD::UNDEF &&
+        FoldOp.getOpcode() != ISD::ConstantFP)
+      break;
+    Ops.push_back(FoldOp);
+    AddToWorkList(FoldOp.getNode());
+  }
+
+  if (Ops.size() != N0.getNumOperands())
+    return SDValue();
+
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                     N0.getValueType(), &Ops[0], Ops.size());
+}
+
 SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
                                     SDValue N1, SDValue N2){
   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -8349,6 +9185,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
         return false;
+      // The loads must not depend on one another.
+      if (LLD->isPredecessorOf(RLD) ||
+          RLD->isPredecessorOf(LLD))
+        return false;
       Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
                          LLD->getBasePtr().getValueType(),
                          TheSelect->getOperand(0), LLD->getBasePtr(),
@@ -8468,7 +9308,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
           const_cast<ConstantFP*>(TV->getConstantFPValue())
         };
         Type *FPTy = Elts[0]->getType();
-        const TargetData &TD = *TLI.getTargetData();
+        const DataLayout &TD = *TLI.getDataLayout();
 
         // Create a ConstantArray of the two constants.
         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
@@ -8583,34 +9423,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
       return SDValue();
 
     // Get a SetCC of the condition
-    // FIXME: Should probably make sure that setcc is legal if we ever have a
-    // target where it isn't.
-    SDValue Temp, SCC;
-    // cast from setcc result type to select result type
-    if (LegalTypes) {
-      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
-                          N0, N1, CC);
-      if (N2.getValueType().bitsLT(SCC.getValueType()))
-        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
-      else
+    // NOTE: Don't create a SETCC if it's not legal on this target.
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC,
+          LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+      SDValue Temp, SCC;
+      // cast from setcc result type to select result type
+      if (LegalTypes) {
+        SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+                            N0, N1, CC);
+        if (N2.getValueType().bitsLT(SCC.getValueType()))
+          Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(),
+                                        N2.getValueType());
+        else
+          Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                             N2.getValueType(), SCC);
+      } else {
+        SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
         Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
                            N2.getValueType(), SCC);
-    } else {
-      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
-      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
-                         N2.getValueType(), SCC);
-    }
+      }
 
-    AddToWorkList(SCC.getNode());
-    AddToWorkList(Temp.getNode());
+      AddToWorkList(SCC.getNode());
+      AddToWorkList(Temp.getNode());
 
-    if (N2C->getAPIntValue() == 1)
-      return Temp;
+      if (N2C->getAPIntValue() == 1)
+        return Temp;
 
-    // shl setcc result by log2 n2c
-    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
-                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
-                                       getShiftAmountTy(Temp.getValueType())));
+      // shl setcc result by log2 n2c
+      return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                         DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                         getShiftAmountTy(Temp.getValueType())));
+    }
   }
 
   // Check to see if this is the equivalent of setcc
@@ -8729,7 +9573,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
 // to alias with anything but itself.  Provides base object and offset as
 // results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
-                           const GlobalValue *&GV, void *&CV) {
+                           const GlobalValue *&GV, const void *&CV) {
   // Assume it is a primitive operation.
   Base = Ptr; Offset = 0; GV = 0; CV = 0;
 
@@ -8754,8 +9598,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
   // for ConstantSDNodes since the same constant pool entry may be represented
   // by multiple nodes with different offsets.
   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
-    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
-                                         : (void *)C->getConstVal();
+    CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+                                         : (const void *)C->getConstVal();
     Offset += C->getOffset();
     return false;
   }
@@ -8780,7 +9624,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
   const GlobalValue *GV1, *GV2;
-  void *CV1, *CV2;
+  const void *CV1, *CV2;
   bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
   bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 683fac6..4854cf7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -53,7 +53,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -1059,7 +1059,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
     MFI(*FuncInfo.MF->getFrameInfo()),
     MCP(*FuncInfo.MF->getConstantPool()),
     TM(FuncInfo.MF->getTarget()),
-    TD(*TM.getTargetData()),
+    TD(*TM.getDataLayout()),
     TII(*TM.getInstrInfo()),
     TLI(*TM.getTargetLowering()),
     TRI(*TM.getRegisterInfo()),
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 3e18ea7..a418290 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -80,9 +80,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
       if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         Type *Ty = AI->getAllocatedType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
         unsigned Align =
-          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+          std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
                    AI->getAlignment());
 
         TySize *= CUI->getZExtValue();   // Get total allocated size.
@@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
             cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
         StaticAllocaMap[AI] =
           MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
-                                                MayNeedSP);
+                                                MayNeedSP, AI);
       }
 
   for (; BB != EB; ++BB)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4488d27..a8381b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -20,7 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
 ///
 /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
 /// the chain and glue. These operands may be implicit on the machine instr.
-static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+                              unsigned &NumImpUses) {
   unsigned N = Node->getNumOperands();
   while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
     --N;
@@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
     --N; // Ignore chain if it exists.
 
   // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
-  for (unsigned I = N; I; --I) {
+  NumImpUses = N - NumExpUses;
+  for (unsigned I = N; I > NumExpUses; --I) {
     if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
       continue;
     if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
@@ -312,8 +314,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
       DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
-    assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
-           "Don't have operand info for this instruction!");
     if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
       BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
@@ -390,10 +390,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     Type *Type = CP->getType();
     // MachineConstantPool wants an explicit alignment.
     if (Align == 0) {
-      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
       if (Align == 0) {
         // Alignment of vector types.  FIXME!
-        Align = TM->getTargetData()->getTypeAllocSize(Type);
+        Align = TM->getDataLayout()->getTypeAllocSize(Type);
       }
     }
 
@@ -410,6 +410,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                                             ES->getTargetFlags()));
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getOffset(),
                                             BA->getTargetFlags()));
   } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
@@ -720,7 +721,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   const MCInstrDesc &II = TII->get(Opc);
   unsigned NumResults = CountResults(Node);
   unsigned NumImpUses = 0;
-  unsigned NodeOperands = countOperands(Node, NumImpUses);
+  unsigned NodeOperands =
+    countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
   bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
 #ifndef NDEBUG
   unsigned NumMIOperands = NodeOperands + NumResults;
@@ -870,6 +872,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     break;
   }
 
+  case ISD::LIFETIME_START:
+  case ISD::LIFETIME_END: {
+    unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+    TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+    .addFrameIndex(FI->getIndex());
+    break;
+  }
+
   case ISD::INLINEASM: {
     unsigned NumOps = Node->getNumOperands();
     if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -884,25 +897,30 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
 
-    // Add the HasSideEffect and isAlignStack bits.
+    // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+    // bits.
     int64_t ExtraInfo =
       cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
                           getZExtValue();
     MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
 
+    // Remember to operand index of the group flags.
+    SmallVector<unsigned, 8> GroupIdx;
+
     // Add all of the operand registers to the instruction.
     for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
-      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+      const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
+      GroupIdx.push_back(MI->getNumOperands());
       MI->addOperand(MachineOperand::CreateImm(Flags));
       ++i;  // Skip the ID value.
 
       switch (InlineAsm::getKind(Flags)) {
       default: llvm_unreachable("Bad flags!");
         case InlineAsm::Kind_RegDef:
-        for (; NumVals; --NumVals, ++i) {
+        for (unsigned j = 0; j != NumVals; ++j, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           // FIXME: Add dead flags for physical and virtual registers defined.
           // For now, mark physical register defs as implicit to help fast
@@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         break;
       case InlineAsm::Kind_RegDefEarlyClobber:
       case InlineAsm::Kind_Clobber:
-        for (; NumVals; --NumVals, ++i) {
+        for (unsigned j = 0; j != NumVals; ++j, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
                          /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
@@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       case InlineAsm::Kind_Mem:  // Addressing mode.
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
-        for (; NumVals; --NumVals, ++i)
+        for (unsigned j = 0; j != NumVals; ++j, ++i)
           AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
                      /*IsDebug=*/false, IsClone, IsCloned);
+
+        // Manually set isTied bits.
+        if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+          unsigned DefGroup = 0;
+          if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+            unsigned DefIdx = GroupIdx[DefGroup] + 1;
+            unsigned UseIdx = GroupIdx.back() + 1;
+            for (unsigned j = 0; j != NumVals; ++j)
+              MI->tieOperands(DefIdx + j, UseIdx + j);
+          }
+        }
         break;
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 908ebb9..abf40b7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -718,7 +718,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // expand it.
           if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node),
                                    DAG, TLI, this);
@@ -824,7 +824,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // expand it.
           if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
@@ -869,25 +869,24 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal:
-             // If this is an unaligned load and the target doesn't support it,
-             // expand it.
-             if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-               Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-               unsigned ABIAlignment =
-                 TLI.getTargetData()->getABITypeAlignment(Ty);
-               if (LD->getAlignment() < ABIAlignment){
-                 ExpandUnalignedLoad(cast<LoadSDNode>(Node),
-                                     DAG, TLI, RVal, RChain);
-               }
-             }
-             break;
+      // If this is an unaligned load and the target doesn't support it,
+      // expand it.
+      if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+        Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+        unsigned ABIAlignment =
+          TLI.getDataLayout()->getABITypeAlignment(Ty);
+        if (LD->getAlignment() < ABIAlignment){
+          ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+        }
+      }
+      break;
     case TargetLowering::Custom: {
-             SDValue Res = TLI.LowerOperation(RVal, DAG);
-             if (Res.getNode()) {
-               RVal = Res;
-               RChain = Res.getValue(1);
-             }
-             break;
+      SDValue Res = TLI.LowerOperation(RVal, DAG);
+      if (Res.getNode()) {
+        RVal = Res;
+        RChain = Res.getValue(1);
+      }
+      break;
     }
     case TargetLowering::Promote: {
       // Only promote a load of vector type to another.
@@ -1060,7 +1059,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
                  Type *Ty =
                    LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
                  unsigned ABIAlignment =
-                   TLI.getTargetData()->getABITypeAlignment(Ty);
+                   TLI.getDataLayout()->getABITypeAlignment(Ty);
                  if (LD->getAlignment() < ABIAlignment){
                    ExpandUnalignedLoad(cast<LoadSDNode>(Node),
                                        DAG, TLI, Value, Chain);
@@ -1241,6 +1240,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     if (Action == TargetLowering::Legal)
       Action = TargetLowering::Custom;
     break;
+  case ISD::DEBUGTRAP:
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Expand) {
+      // replace ISD::DEBUGTRAP with ISD::TRAP
+      SDValue NewVal;
+      NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+                           Node->getOperand(0));
+      ReplaceNode(Node, NewVal.getNode());
+      LegalizeOp(NewVal.getNode());
+      return;
+    }
+    break;
+
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1588,26 +1600,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     break;
   case TargetLowering::Expand: {
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    ISD::CondCode InvCC = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
     default: llvm_unreachable("Don't know how to expand this condition!");
-    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
-    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
-    // FIXME: Implement more expansions.
-    }
-
-    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
-    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    case ISD::SETO: 
+        assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+            == TargetLowering::Legal
+            && "If SETO is expanded, SETOEQ must be legal!");
+        CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+    case ISD::SETUO:  
+        assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+            == TargetLowering::Legal
+            && "If SETUO is expanded, SETUNE must be legal!");
+        CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;  break;
+    case ISD::SETOEQ:
+    case ISD::SETOGT:
+    case ISD::SETOGE:
+    case ISD::SETOLT:
+    case ISD::SETOLE:
+    case ISD::SETONE: 
+    case ISD::SETUEQ: 
+    case ISD::SETUNE: 
+    case ISD::SETUGT: 
+    case ISD::SETUGE: 
+    case ISD::SETULT: 
+    case ISD::SETULE:
+        // If we are floating point, assign and break, otherwise fall through.
+        if (!OpVT.isInteger()) {
+          // We can use the 4th bit to tell if we are the unordered
+          // or ordered version of the opcode.
+          CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+          Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+          CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+          break;
+        }
+        // Fallthrough if we are unsigned integer.
+    case ISD::SETLE:
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETLT:
+    case ISD::SETNE:
+    case ISD::SETEQ:
+      InvCC = ISD::getSetCCSwappedOperands(CCCode);
+      if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+        // We only support using the inverted operation and not a
+        // different manner of supporting expanding these cases.
+        llvm_unreachable("Don't know how to expand this condition!");
+      }
+      LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+      RHS = SDValue();
+      CC = SDValue();
+      return;
+    }
+    
+    SDValue SetCC1, SetCC2;
+    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+      // If we aren't the ordered or unorder operation,
+      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    } else {
+      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+    }
     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
     RHS = SDValue();
     CC  = SDValue();
@@ -1626,7 +1683,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
                                                DebugLoc dl) {
   // Create the stack frame object.
   unsigned SrcAlign =
-    TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+    TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
                                               getTypeForEVT(*DAG.getContext()));
   SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
 
@@ -1638,7 +1695,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
-  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+  unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -2042,7 +2099,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                                    SDValue Op0,
                                                    EVT DestVT,
                                                    DebugLoc dl) {
-  if (Op0.getValueType() == MVT::i32) {
+  if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
     // simple 32-bit [signed|unsigned] integer to float/double expansion
 
     // Get the stack frame index of a 8 byte buffer.
@@ -2787,7 +2844,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
-                       DAG.getConstant(TLI.getTargetData()->
+                       DAG.getConstant(TLI.getDataLayout()->
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
@@ -3109,6 +3166,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = Node->getOperand(1);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
         (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         // If div is legal, it's better to do the normal expansion
+         !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
          useDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
@@ -3366,7 +3425,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     EVT PTy = TLI.getPointerTy();
 
-    const TargetData &TD = *TLI.getTargetData();
+    const DataLayout &TD = *TLI.getDataLayout();
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e393896..92dc5a9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
-  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
-      == TargetLowering::Custom)
-    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
-
-  if (Res.getNode() == 0) {
-    switch (N->getOpcode()) {
-    default:
-  #ifndef NDEBUG
-      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); dbgs() << "\n";
-  #endif
-      llvm_unreachable("Do not know how to expand this operator's operand!");
-
-    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
-    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
-    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
-
-    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
-    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
-    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
-    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
-    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
-    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
-    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
-                                                    OpNo); break;
-    }
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+  case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+  case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+  case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                  OpNo); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e8e968a..a370fae 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   EVT SmallVT = LHS.getValueType();
 
   // To determine if the result overflowed in a larger type, we extend the
-  // input to the larger type, do the multiply, then check the high bits of
-  // the result to see if the overflow happened.
+  // input to the larger type, do the multiply (checking if it overflows),
+  // then also check the high bits of the result to see if overflow happened
+  // there.
   if (N->getOpcode() == ISD::SMULO) {
     LHS = SExtPromotedInteger(LHS);
     RHS = SExtPromotedInteger(RHS);
@@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
     LHS = ZExtPromotedInteger(LHS);
     RHS = ZExtPromotedInteger(RHS);
   }
-  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+  SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+  SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
 
-  // Overflow occurred iff the high part of the result does not
-  // zero/sign-extend the low part.
+  // Overflow occurred if it occurred in the larger type, or if the high part
+  // of the result does not zero/sign-extend the low part.  Check this second
+  // possibility first.
   SDValue Overflow;
   if (N->getOpcode() == ISD::UMULO) {
-    // Unsigned overflow occurred iff the high part is non-zero.
+    // Unsigned overflow occurred if the high part is non-zero.
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
                              DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
                             DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
   } else {
-    // Signed overflow occurred iff the high part does not sign extend the low.
+    // Signed overflow occurred if the high part does not sign extend the low.
     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
                                Mul, DAG.getValueType(SmallVT));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
   }
 
+  // The only other way for overflow to occur is if the multiplication in the
+  // larger type itself overflowed.
+  Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+                         SDValue(Mul.getNode(), 1));
+
   // Use the calculated overflow everywhere.
   ReplaceValueWith(SDValue(N, 1), Overflow);
   return Mul;
@@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
 void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
-  Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
-  EVT PtrVT = TLI.getPointerTy();
-  Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
   DebugLoc dl = N->getDebugLoc();
 
   // A divide for UMULO should be faster than a function call.
   if (N->getOpcode() == ISD::UMULO) {
     SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
-    DebugLoc DL = N->getDebugLoc();
 
-    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
     SplitInteger(MUL, Lo, Hi);
 
     // A divide for UMULO will be faster than a function call. Select to
     // make sure we aren't using 0.
     SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
-                                  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+                                  RHS, DAG.getConstant(0, VT), ISD::SETEQ);
     SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
                                   DAG.getConstant(1, VT), RHS);
-    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
-    SDValue Overflow;
-    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+    SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+                                    ISD::SETNE);
+    Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
+                           DAG.getConstant(0, N->getValueType(1)),
+                           Overflow);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
   }
 
+  Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  
   // Replace this with a libcall that will check overflow.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i32)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 39337ff..644e36e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -15,7 +15,7 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 94fc976..20b7ce6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -625,6 +625,7 @@ private:
   SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
   SDValue WidenVecRes_VSETCC(SDNode* N);
 
+  SDValue WidenVecRes_Ternary(SDNode *N);
   SDValue WidenVecRes_Binary(SDNode *N);
   SDValue WidenVecRes_Convert(SDNode *N);
   SDValue WidenVecRes_POWI(SDNode *N);
@@ -633,7 +634,7 @@ private:
   SDValue WidenVecRes_InregOp(SDNode *N);
 
   // Widen Vector Operand.
-  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  bool WidenVectorOperand(SDNode *N, unsigned OpNo);
   SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 06f6bd6..6bcb3b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -20,7 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
     // is legal but the result is not.
-    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+    unsigned NumElems = 2;
+    EVT ElemVT = NOutVT;
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+    // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+    while (!isTypeLegal(NVT)) {
+      unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+      // If the element size is smaller than byte, bail.
+      if (NewSizeInBits < 8)
+        break;
+      NumElems *= 2;
+      ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+      NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+    }
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
-      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
-                       DAG.getIntPtrConstant(0));
-      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
-                       DAG.getIntPtrConstant(1));
+
+      SmallVector<SDValue, 8> Vals;
+      for (unsigned i = 0; i < NumElems; ++i)
+        Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+                                   CastInOp, DAG.getIntPtrConstant(i)));
+
+      // Build Lo, Hi pair by pairing extracted elements if needed.
+      unsigned Slot = 0;
+      for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+        // Each iteration will BUILD_PAIR two nodes and append the result until
+        // there are only two nodes left, i.e. Lo and Hi.
+        SDValue LHS = Vals[Slot];
+        SDValue RHS = Vals[Slot + 1];
+
+        if (TLI.isBigEndian())
+          std::swap(LHS, RHS);
+
+        Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+                                   EVT::getIntegerVT(
+                                     *DAG.getContext(),
+                                     LHS.getValueType().getSizeInBits() << 1),
+                                   LHS, RHS));
+      }
+      Lo = Vals[Slot++];
+      Hi = Vals[Slot++];
 
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
@@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and expanded destination types.
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+    TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
                                               getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 704f99b..22f8d51 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,7 @@ class VectorLegalizer {
   // Implement vselect in terms of XOR, AND, OR when blend is not supported
   // by the target.
   SDValue ExpandVSELECT(SDValue Op);
+  SDValue ExpandSELECT(SDValue Op);
   SDValue ExpandLoad(SDValue Op);
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
@@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::FMA:
   case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
@@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case TargetLowering::Expand:
     if (Node->getOpcode() == ISD::VSELECT)
       Result = ExpandVSELECT(Op);
+    else if (Node->getOpcode() == ISD::SELECT)
+      Result = ExpandSELECT(Op);
     else if (Node->getOpcode() == ISD::UINT_TO_FP)
       Result = ExpandUINT_TO_FLOAT(Op);
     else if (Node->getOpcode() == ISD::FNEG)
@@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   return TF;
 }
 
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+  // Lower a select instruction where the condition is a scalar and the
+  // operands are vectors. Lower this select to VSELECT and implement it
+  // using XOR AND OR. The selector bit is broadcasted. 
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Mask = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+
+  assert(VT.isVector() && !Mask.getValueType().isVector()
+         && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+  unsigned NumElem = VT.getVectorNumElements();
+
+  // If we can't even use the basic vector operations of
+  // AND,OR,XOR, we will have to scalarize the op.
+  // Notice that the operation may be 'promoted' which means that it is
+  // 'bitcasted' to another type which is handled.
+  // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  // Generate a mask operand.
+  EVT MaskTy = TLI.getSetCCResultType(VT);
+  assert(MaskTy.isVector() && "Invalid CC type");
+  assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
+         && "Invalid mask size");
+
+  // What is the size of each element in the vector mask.
+  EVT BitTy = MaskTy.getScalarType();
+
+  Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
+          DAG.getConstant(0, BitTy));
+
+  // Broadcast the mask so that the entire vector is all-one or all zero.
+  SmallVector<SDValue, 8> Ops(NumElem, Mask);
+  Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+
+  // Bitcast the operands to be the same type as the mask.
+  // This is needed when we select between FP types because
+  // the mask is a vector of integers.
+  Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+  Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+  SDValue AllOnes = DAG.getConstant(
+            APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+  SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+  Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+  Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+  SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
@@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // AND,OR,XOR, we will have to scalarize the op.
   // Notice that the operation may be 'promoted' which means that it is
   // 'bitcasted' to another type which is handled.
+  // This operation also isn't safe with AND, OR, XOR when the boolean
+  // type is 0/1 as we need an all ones vector constant to mask with.
+  // FIXME: Sign extend 1 to all ones if thats legal on the target.
   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
-      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand)
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
+      TLI.getBooleanContents(true) !=
+      TargetLowering::ZeroOrNegativeOneBooleanContent)
     return DAG.UnrollVectorOp(Op.getNode());
 
-  assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
+  assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
          && "Invalid mask size");
   // Bitcast the operands to be the same type as the mask.
   // This is needed when we select between FP types because
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4709202..d51a6eb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
-    TLI.getTargetData()->getPrefTypeAlignment(VecType);
+    TLI.getDataLayout()->getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
                             false, false, 0);
 
@@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FTRUNC:
     Res = WidenVecRes_Unary(N);
     break;
+  case ISD::FMA:
+    Res = WidenVecRes_Ternary(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     SetWidenedVector(SDValue(N, ResNo), Res);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+  // Ternary op widening.
+  DebugLoc dl = N->getDebugLoc();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
   unsigned Opcode = N->getOpcode();
@@ -2069,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Res = SDValue();
 
+  // See if the target wants to custom widen this node.
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
     N->dump(&DAG);
     dbgs() << "\n";
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index f88b26d..d2269f8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -28,8 +28,8 @@ class SDNode;
 class SDNodeOrdering {
   DenseMap<const SDNode*, unsigned> OrderMap;
 
-  void operator=(const SDNodeOrdering&);   // Do not implement.
-  SDNodeOrdering(const SDNodeOrdering&);   // Do not implement.
+  void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+  SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
 public:
   SDNodeOrdering() {}
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b7ce48a..2ecdd89 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,11 +13,12 @@
 
 #define DEBUG_TYPE "pre-RA-sched"
 #include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/SmallSet.h"
@@ -34,6 +35,10 @@ STATISTIC(NumPRCopies,   "Number of physical copies");
 static RegisterScheduler
   fastDAGScheduler("fast", "Fast suboptimal list scheduling",
                    createFastDAGScheduler);
+static RegisterScheduler
+  linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+                        createDAGLinearizer);
+
 
 namespace {
   /// FastPriorityQueue - A degenerate priority queue that considers
@@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       }
     }
     if (isNewLoad) {
-      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+      SDep D(LoadSU, SDep::Barrier);
+      D.setLatency(LoadSU->Latency);
+      AddPred(NewSU, D);
     }
 
     ++NumUnfolds;
@@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
   for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
     RemovePred(DelDeps[i].first, DelDeps[i].second);
   }
-
-  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
-  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+  SDep FromDep(SU, SDep::Data, Reg);
+  FromDep.setLatency(SU->Latency);
+  AddPred(CopyFromSU, FromDep);
+  SDep ToDep(CopyFromSU, SDep::Data, 0);
+  ToDep.setLatency(CopyFromSU->Latency);
+  AddPred(CopyToSU, ToDep);
 
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
@@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
-          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
           NewDef = Copies.back();
         }
 
         DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
-        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                             /*Reg=*/0, /*isNormalMemory=*/false,
-                             /*isMustAlias=*/false, /*isArtificial=*/true));
+        AddPred(NewDef, SDep(TrySU, SDep::Artificial));
         TrySU->isAvailable = false;
         CurSU = NewDef;
       }
@@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
 #endif
 }
 
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+  ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+  void Schedule();
+
+  MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+  std::vector<SDNode*> Sequence;
+  DenseMap<SDNode*, SDNode*> GluedMap;  // Cache glue to its user
+
+  void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+  if (N->getNodeId() != 0)
+    llvm_unreachable(0);
+
+  if (!N->isMachineOpcode() &&
+      (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+    // These nodes do not need to be translated into MIs.
+    return;
+
+  DEBUG(dbgs() << "\n*** Scheduling: ");
+  DEBUG(N->dump(DAG));
+  Sequence.push_back(N);
+
+  unsigned NumOps = N->getNumOperands();
+  if (unsigned NumLeft = NumOps) {
+    SDNode *GluedOpN = 0;
+    do {
+      const SDValue &Op = N->getOperand(NumLeft-1);
+      SDNode *OpN = Op.getNode();
+
+      if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+        // Schedule glue operand right above N.
+        GluedOpN = OpN;
+        assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+        OpN->setNodeId(0);
+        ScheduleNode(OpN);
+        continue;
+      }
+
+      if (OpN == GluedOpN)
+        // Glue operand is already scheduled.
+        continue;
+
+      DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+      if (DI != GluedMap.end() && DI->second != N)
+        // Users of glues are counted against the glued users.
+        OpN = DI->second;
+
+      unsigned Degree = OpN->getNodeId();
+      assert(Degree > 0 && "Predecessor over-released!");
+      OpN->setNodeId(--Degree);
+      if (Degree == 0)
+        ScheduleNode(OpN);
+    } while (--NumLeft);
+  }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+  while (SDNode *Glued = N->getGluedUser())
+    N = Glued;
+  return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+  DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+  SmallVector<SDNode*, 8> Glues;
+  unsigned DAGSize = 0;
+  for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+         E = DAG->allnodes_end(); I != E; ++I) {
+    SDNode *N = I;
+
+    // Use node id to record degree.
+    unsigned Degree = N->use_size();
+    N->setNodeId(Degree);
+    unsigned NumVals = N->getNumValues();
+    if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+        N->hasAnyUseOfValue(NumVals-1)) {
+      SDNode *User = findGluedUser(N);
+      if (User) {
+        Glues.push_back(N);
+        GluedMap.insert(std::make_pair(N, User));
+      }
+    }
+
+    if (N->isMachineOpcode() ||
+        (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+      ++DAGSize;
+  }
+
+  for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+    SDNode *Glue = Glues[i];
+    SDNode *GUser = GluedMap[Glue];
+    unsigned Degree = Glue->getNodeId();
+    unsigned UDegree = GUser->getNodeId();
+
+    // Glue user must be scheduled together with the glue operand. So other
+    // users of the glue operand must be treated as its users.
+    SDNode *ImmGUser = Glue->getGluedUser();
+    for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+         ui != ue; ++ui)
+      if (*ui == ImmGUser)
+        --Degree;
+    GUser->setNodeId(UDegree + Degree);
+    Glue->setNodeId(1);
+  }
+
+  Sequence.reserve(DAGSize);
+  ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+
+  DEBUG({
+      dbgs() << "\n*** Final schedule ***\n";
+    });
+
+  // FIXME: Handle dbg_values.
+  unsigned NumNodes = Sequence.size();
+  for (unsigned i = 0; i != NumNodes; ++i) {
+    SDNode *N = Sequence[NumNodes-i-1];
+    DEBUG(N->dump(DAG));
+    Emitter.EmitNode(N, false, false, VRBaseMap);
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  InsertPos = Emitter.getInsertPos();
+  return Emitter.getBlock();
+}
+
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
@@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes *
 llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   return new ScheduleDAGFast(*IS->MF);
 }
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index bf0a437..c554569 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     break;
   case ISD::MERGE_VALUES:
   case ISD::TokenFactor:
+  case ISD::LIFETIME_START:
+  case ISD::LIFETIME_END:
   case ISD::CopyToReg:
   case ISD::CopyFromReg:
   case ISD::EH_LABEL:
@@ -1056,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
 
     // Add a data dependency to reflect that NewSU reads the value defined
     // by LoadSU.
-    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+    SDep D(LoadSU, SDep::Data, 0);
+    D.setLatency(LoadSU->Latency);
+    AddPred(NewSU, D);
 
     if (isNewLoad)
       AvailableQueue->addNode(LoadSU);
@@ -1138,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
       // Avoid scheduling the def-side copy before other successors. Otherwise
       // we could introduce another physreg interference on the copy and
       // continue inserting copies indefinitely.
-      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
-             /*Reg=*/0, /*isNormalMemory=*/false,
-             /*isMustAlias=*/false, /*isArtificial=*/true);
-      AddPred(SuccSU, D);
+      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
     }
   }
   for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
 
-  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
-  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+  SDep FromDep(SU, SDep::Data, Reg);
+  FromDep.setLatency(SU->Latency);
+  AddPred(CopyFromSU, FromDep);
+  SDep ToDep(CopyFromSU, SDep::Data, 0);
+  ToDep.setLatency(CopyFromSU->Latency);
+  AddPred(CopyToSU, ToDep);
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(CopyFromSU);
@@ -1357,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
         if (!BtSU->isPending)
           AvailableQueue->remove(BtSU);
       }
-      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
-                          /*Reg=*/0, /*isNormalMemory=*/false,
-                          /*isMustAlias=*/false, /*isArtificial=*/true));
+      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer avaialable. Schedule a successor that's now
@@ -1411,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
       DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
             << " to SU #" << Copies.front()->NodeNum << "\n");
-      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                          /*Reg=*/0, /*isNormalMemory=*/false,
-                          /*isMustAlias=*/false,
-                          /*isArtificial=*/true));
+      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
       NewDef = Copies.back();
     }
 
     DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
           << " to SU #" << TrySU->NodeNum << "\n");
     LiveRegDefs[Reg] = NewDef;
-    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                         /*Reg=*/0, /*isNormalMemory=*/false,
-                         /*isMustAlias=*/false,
-                         /*isArtificial=*/true));
+    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
     TrySU->isAvailable = false;
     CurSU = NewDef;
   }
@@ -1756,6 +1753,7 @@ public:
     return V;
   }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void dump(ScheduleDAG *DAG) const {
     // Emulate pop() without clobbering NodeQueueIds.
     std::vector<SUnit*> DumpQueue = Queue;
@@ -1766,6 +1764,7 @@ public:
       SU->dump(DAG);
     }
   }
+#endif
 };
 
 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
@@ -1893,6 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
 //===----------------------------------------------------------------------===//
 
 void RegReductionPQBase::dumpRegPressure() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
          E = TRI->regclass_end(); I != E; ++I) {
     const TargetRegisterClass *RC = *I;
@@ -1902,6 +1902,7 @@ void RegReductionPQBase::dumpRegPressure() const {
     DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
           << '\n');
   }
+#endif
 }
 
 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
@@ -2930,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
             !scheduleDAG->IsReachable(SuccSU, SU)) {
           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
                        << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
-          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
-                                        /*Reg=*/0, /*isNormalMemory=*/false,
-                                        /*isMustAlias=*/false,
-                                        /*isArtificial=*/true));
+          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
         }
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 748668c..a197fcb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if(isChain && OpN->getOpcode() == ISD::TokenFactor)
           OpLatency = 0;
 
-        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
-                               OpLatency, PhysReg);
+        SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+          : SDep(OpSU, SDep::Data, PhysReg);
+        Dep.setLatency(OpLatency);
         if (!isChain && !UnitLatencies) {
-          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
-          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+          computeOperandLatency(OpN, N, i, Dep);
+          ST.adjustSchedDependency(OpSU, SU, Dep);
         }
 
-        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+        if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
           // Multiple register uses are combined in the same SUnit. For example,
           // we could have a set of glued nodes with all their defs consumed by
           // another set of glued nodes. Register pressure tracking sees this as
@@ -643,6 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
 }
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   if (!SU->getNode()) {
     dbgs() << "PHYS REG COPY\n";
     return;
@@ -659,8 +661,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
     dbgs() << "\n";
     GluedNodes.pop_back();
   }
+#endif
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ScheduleDAGSDNodes::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     if (SUnit *SU = Sequence[i])
@@ -669,6 +673,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const {
       dbgs() << "**** NOOP ****\n";
   }
 }
+#endif
 
 #ifndef NDEBUG
 /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
@@ -827,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     }
 
     SmallVector<SDNode *, 4> GluedNodes;
-    for (SDNode *N = SU->getNode()->getGluedNode(); N;
-         N = N->getGluedNode())
+    for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
       GluedNodes.push_back(N);
     while (!GluedNodes.empty()) {
       SDNode *N = GluedNodes.back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 84e41fc..907356f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -114,7 +114,8 @@ namespace llvm {
     /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
     /// according to the order specified in Sequence.
     ///
-    MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+    virtual MachineBasicBlock*
+    EmitSchedule(MachineBasicBlock::iterator &InsertPos);
 
     virtual void dumpNode(const SUnit *SU) const;
 
@@ -158,6 +159,12 @@ namespace llvm {
       void InitNodeNumDefs();
     };
 
+  protected:
+    /// ForceUnitLatencies - Return true if all scheduling edges should be given
+    /// a latency value of one.  The default is to return false; schedulers may
+    /// override this as needed.
+    virtual bool forceUnitLatencies() const { return false; }
+
   private:
     /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
     /// combined SUnits.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index c851291..30f03ac 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -25,7 +25,7 @@
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f4fe892..f000ce3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -29,7 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -91,11 +91,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
                                            const APFloat& Val) {
   assert(VT.isFloatingPoint() && "Can only convert between FP types");
 
-  // PPC long double cannot be converted to any other type.
-  if (VT == MVT::ppcf128 ||
-      &Val.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
-
   // convert modifies in place, so make a copy.
   APFloat Val2 = APFloat(Val);
   bool losesInfo;
@@ -136,13 +131,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // constants are.
   SDValue NotZero = N->getOperand(i);
   unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
-  if (isa<ConstantSDNode>(NotZero)) {
-    if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
-        EltSize)
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+    if (CN->getAPIntValue().countTrailingOnes() < EltSize)
       return false;
-  } else if (isa<ConstantFPSDNode>(NotZero)) {
-    if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
-              .bitcastToAPInt().countTrailingOnes() < EltSize)
+  } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+    if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
       return false;
   } else
     return false;
@@ -179,11 +172,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Do not accept build_vectors that aren't all constants or which have non-0
   // elements.
   SDValue Zero = N->getOperand(i);
-  if (isa<ConstantSDNode>(Zero)) {
-    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+    if (!CN->isNullValue())
       return false;
-  } else if (isa<ConstantFPSDNode>(Zero)) {
-    if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+  } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+    if (!CFPN->getValueAPF().isPosZero())
       return false;
   } else
     return false;
@@ -494,8 +487,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::TargetBlockAddress:
   case ISD::BlockAddress: {
-    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
-    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+    const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+    ID.AddPointer(BA->getBlockAddress());
+    ID.AddInteger(BA->getOffset());
+    ID.AddInteger(BA->getTargetFlags());
     break;
   }
   } // end switch (N->getOpcode())
@@ -883,7 +878,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
                    PointerType::get(Type::getInt8Ty(*getContext()), 0) :
                    VT.getTypeForEVT(*getContext());
 
-  return TLI.getTargetData()->getABITypeAlignment(Ty);
+  return TLI.getDataLayout()->getABITypeAlignment(Ty);
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
@@ -1097,10 +1092,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
          "Cannot set target flags on target-independent globals");
 
   // Truncate (with sign-extension) the offset value to the pointer size.
-  EVT PTy = TLI.getPointerTy();
-  unsigned BitWidth = PTy.getSizeInBits();
+  unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
   if (BitWidth < 64)
-    Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+    Offset = SignExtend64(Offset, BitWidth);
 
   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
   if (!GVar) {
@@ -1174,7 +1168,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1201,7 +1195,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+    Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1471,6 +1465,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
 
 
 SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+                                      int64_t Offset,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1478,12 +1473,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(BA);
+  ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
+                                                     TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1542,7 +1539,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
-  std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+  std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
 
   int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
   return getFrameIndex(FrameIdx, TLI.getPointerTy());
@@ -1555,7 +1552,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
                             VT2.getStoreSizeInBits())/8;
   Type *Ty1 = VT1.getTypeForEVT(*getContext());
   Type *Ty2 = VT2.getTypeForEVT(*getContext());
-  const TargetData *TD = TLI.getTargetData();
+  const DataLayout *TD = TLI.getDataLayout();
   unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
                             TD->getPrefTypeAlignment(Ty2));
 
@@ -1610,10 +1607,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
   }
   if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
     if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
-      // No compile time operations on this type yet.
-      if (N1C->getValueType(0) == MVT::ppcf128)
-        return SDValue();
-
       APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
       switch (Cond) {
       default: break;
@@ -2445,8 +2438,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
-      // No compile time operations on ppcf128.
-      if (VT == MVT::ppcf128) break;
       APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
@@ -2455,9 +2446,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     }
     case ISD::BITCAST:
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(Val.bitsToFloat(), VT);
+        return getConstantFP(APFloat(Val), VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(Val.bitsToDouble(), VT);
+        return getConstantFP(APFloat(Val), VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), VT);
@@ -2475,61 +2466,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   // Constant fold unary operations with a floating point constant operand.
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
     APFloat V = C->getValueAPF();    // make copy
-    if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
-      switch (Opcode) {
-      case ISD::FNEG:
-        V.changeSign();
+    switch (Opcode) {
+    case ISD::FNEG:
+      V.changeSign();
+      return getConstantFP(V, VT);
+    case ISD::FABS:
+      V.clearSign();
+      return getConstantFP(V, VT);
+    case ISD::FCEIL: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      case ISD::FABS:
-        V.clearSign();
+      break;
+    }
+    case ISD::FTRUNC: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      case ISD::FCEIL: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FTRUNC: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FFLOOR: {
-        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
-        if (fs == APFloat::opOK || fs == APFloat::opInexact)
-          return getConstantFP(V, VT);
-        break;
-      }
-      case ISD::FP_EXTEND: {
-        bool ignored;
-        // This can return overflow, underflow, or inexact; we don't care.
-        // FIXME need to be more flexible about rounding mode.
-        (void)V.convert(*EVTToAPFloatSemantics(VT),
-                        APFloat::rmNearestTiesToEven, &ignored);
+      break;
+    }
+    case ISD::FFLOOR: {
+      APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+      if (fs == APFloat::opOK || fs == APFloat::opInexact)
         return getConstantFP(V, VT);
-      }
-      case ISD::FP_TO_SINT:
-      case ISD::FP_TO_UINT: {
-        integerPart x[2];
-        bool ignored;
-        assert(integerPartWidth >= 64);
-        // FIXME need to be more flexible about rounding mode.
-        APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
-                              Opcode==ISD::FP_TO_SINT,
-                              APFloat::rmTowardZero, &ignored);
-        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
-          break;
-        APInt api(VT.getSizeInBits(), x);
-        return getConstant(api, VT);
-      }
-      case ISD::BITCAST:
-        if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
-          return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
-        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
-          return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+      break;
+    }
+    case ISD::FP_EXTEND: {
+      bool ignored;
+      // This can return overflow, underflow, or inexact; we don't care.
+      // FIXME need to be more flexible about rounding mode.
+      (void)V.convert(*EVTToAPFloatSemantics(VT),
+                      APFloat::rmNearestTiesToEven, &ignored);
+      return getConstantFP(V, VT);
+    }
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT: {
+      integerPart x[2];
+      bool ignored;
+      assert(integerPartWidth >= 64);
+      // FIXME need to be more flexible about rounding mode.
+      APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+                            Opcode==ISD::FP_TO_SINT,
+                            APFloat::rmTowardZero, &ignored);
+      if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
         break;
-      }
+      APInt api(VT.getSizeInBits(), x);
+      return getConstant(api, VT);
+    }
+    case ISD::BITCAST:
+      if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+      else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+        return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+      break;
     }
   }
 
@@ -2817,6 +2806,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
         if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
           if (CFP->getValueAPF().isZero())
             return N1;
+      } else if (Opcode == ISD::FMUL) {
+        ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
+        SDValue V = N2;
+
+        // If the first operand isn't the constant, try the second
+        if (!CFP) {
+          CFP = dyn_cast<ConstantFPSDNode>(N2);
+          V = N1;
+        }
+
+        if (CFP) {
+          // 0*x --> 0
+          if (CFP->isZero())
+            return SDValue(CFP,0);
+          // 1*x --> x
+          if (CFP->isExactlyValue(1.0))
+            return V;
+        }
       }
     }
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -2935,17 +2942,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     // expanding large vector constants.
     if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue Elt = N1.getOperand(N2C->getZExtValue());
-      EVT VEltTy = N1.getValueType().getVectorElementType();
-      if (Elt.getValueType() != VEltTy) {
+
+      if (VT != Elt.getValueType())
         // If the vector element type is not legal, the BUILD_VECTOR operands
-        // are promoted and implicitly truncated.  Make that explicit here.
-        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
-      }
-      if (VT != VEltTy) {
-        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
-        // result is implicitly extended.
-        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
-      }
+        // are promoted and implicitly truncated, and the result implicitly
+        // extended. Make that explicit here.
+        Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
       return Elt;
     }
 
@@ -3036,7 +3039,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       // Cannonicalize constant to RHS if commutative
       std::swap(N1CFP, N2CFP);
       std::swap(N1, N2);
-    } else if (N2CFP && VT != MVT::ppcf128) {
+    } else if (N2CFP) {
       APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
       APFloat::opStatus s;
       switch (Opcode) {
@@ -3435,7 +3438,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
-    if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
         TLI.allowsUnalignedMemoryAccesses(VT)) {
       VT = TLI.getPointerTy();
     } else {
@@ -3503,7 +3506,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize =
+    MF.getFunction()->getFnAttributes().
+      hasAttribute(Attributes::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3523,7 +3528,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3596,7 +3601,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getFnAttributes().
+    hasAttribute(Attributes::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3612,7 +3618,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3674,7 +3680,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getFnAttributes().
+    hasAttribute(Attributes::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3687,7 +3694,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3781,7 +3788,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3836,7 +3843,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
@@ -3885,7 +3892,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     return Result;
 
   // Emit a library call.
-  Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+  Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3923,17 +3930,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
                                 SDValue Swp, MachinePointerInfo PtrInfo,
                                 unsigned Alignment,
                                 AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {                                
+                                SynchronizationScope SynchScope) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
   MachineFunction &MF = getMachineFunction();
-  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
+  // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
   // For now, atomics are considered to be volatile always.
   // FIXME: Volatile isn't really correct; we should keep track of atomic
   // orderings in the memoperand.
-  Flags |= MachineMemOperand::MOVolatile;
+  unsigned Flags = MachineMemOperand::MOVolatile;
+  if (Opcode != ISD::ATOMIC_STORE)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Opcode != ISD::ATOMIC_LOAD)
+    Flags |= MachineMemOperand::MOStore;
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
@@ -3983,17 +3994,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     Alignment = getEVTAlignment(MemVT);
 
   MachineFunction &MF = getMachineFunction();
-  // A monotonic store does not load; a release store "loads" in the sense
-  // that other stores cannot be sunk past it.
+  // An atomic store does not load. An atomic load does not store.
   // (An atomicrmw obviously both loads and stores.)
-  unsigned Flags = MachineMemOperand::MOStore;
-  if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic)
-    Flags |= MachineMemOperand::MOLoad;
-
-  // For now, atomics are considered to be volatile always.
+  // For now, atomics are considered to be volatile always, and they are
+  // chained as such.
   // FIXME: Volatile isn't really correct; we should keep track of atomic
   // orderings in the memoperand.
-  Flags |= MachineMemOperand::MOVolatile;
+  unsigned Flags = MachineMemOperand::MOVolatile;
+  if (Opcode != ISD::ATOMIC_STORE)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Opcode != ISD::ATOMIC_LOAD)
+    Flags |= MachineMemOperand::MOStore;
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
@@ -4056,16 +4067,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     Alignment = getEVTAlignment(MemVT);
 
   MachineFunction &MF = getMachineFunction();
-  // A monotonic load does not store; an acquire load "stores" in the sense
-  // that other loads cannot be hoisted past it.
-  unsigned Flags = MachineMemOperand::MOLoad;
-  if (Ordering > Monotonic)
-    Flags |= MachineMemOperand::MOStore;
-
-  // For now, atomics are considered to be volatile always.
+  // An atomic store does not load. An atomic load does not store.
+  // (An atomicrmw obviously both loads and stores.)
+  // For now, atomics are considered to be volatile always, and they are
+  // chained as such.
   // FIXME: Volatile isn't really correct; we should keep track of atomic
   // orderings in the memoperand.
-  Flags |= MachineMemOperand::MOVolatile;
+  unsigned Flags = MachineMemOperand::MOVolatile;
+  if (Opcode != ISD::ATOMIC_STORE)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Opcode != ISD::ATOMIC_LOAD)
+    Flags |= MachineMemOperand::MOStore;
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
@@ -4157,6 +4169,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   assert((Opcode == ISD::INTRINSIC_VOID ||
           Opcode == ISD::INTRINSIC_W_CHAIN ||
           Opcode == ISD::PREFETCH ||
+          Opcode == ISD::LIFETIME_START ||
+          Opcode == ISD::LIFETIME_END ||
           (Opcode <= INT_MAX &&
            (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
          "Opcode is not a memory-accessing opcode!");
@@ -4226,7 +4240,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       bool isVolatile, bool isNonTemporal, bool isInvariant,
                       unsigned Alignment, const MDNode *TBAAInfo,
                       const MDNode *Ranges) {
-  assert(Chain.getValueType() == MVT::Other && 
+  assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
@@ -4284,7 +4298,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
   ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), 
+                                     MMO->isNonTemporal(),
                                      MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = 0;
@@ -4303,7 +4317,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               MachinePointerInfo PtrInfo,
                               bool isVolatile, bool isNonTemporal,
-                              bool isInvariant, unsigned Alignment, 
+                              bool isInvariant, unsigned Alignment,
                               const MDNode *TBAAInfo,
                               const MDNode *Ranges) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -4332,7 +4346,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
          "Load is already a indexed load!");
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
                  LD->getChain(), Base, Offset, LD->getPointerInfo(),
-                 LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), 
+                 LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
                  false, LD->getAlignment());
 }
 
@@ -4340,7 +4354,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachinePointerInfo PtrInfo,
                                bool isVolatile, bool isNonTemporal,
                                unsigned Alignment, const MDNode *TBAAInfo) {
-  assert(Chain.getValueType() == MVT::Other && 
+  assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
@@ -4365,7 +4379,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachineMemOperand *MMO) {
-  assert(Chain.getValueType() == MVT::Other && 
+  assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
@@ -4394,7 +4408,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     EVT SVT,bool isVolatile, bool isNonTemporal,
                                     unsigned Alignment,
                                     const MDNode *TBAAInfo) {
-  assert(Chain.getValueType() == MVT::Other && 
+  assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
@@ -4421,7 +4435,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     MachineMemOperand *MMO) {
   EVT VT = Val.getValueType();
 
-  assert(Chain.getValueType() == MVT::Other && 
+  assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (VT == SVT)
     return getStore(Chain, dl, Val, Ptr, MMO);
@@ -6074,7 +6088,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
     unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
     llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                            TLI.getTargetData());
+                            TLI.getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ba5bd79..3fbf7c2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DebugInfo.h"
@@ -43,7 +44,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
@@ -88,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT);
+                                      EVT PartVT, EVT ValueVT, const Value *V);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -98,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                 const SDValue *Parts,
                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                const Value *V,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   if (ValueVT.isVector())
-    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+                                  PartVT, ValueVT, V);
 
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -124,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
 
       if (RoundParts > 2) {
         Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
-                              PartVT, HalfVT);
+                              PartVT, HalfVT, V);
         Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
-                              RoundParts / 2, PartVT, HalfVT);
+                              RoundParts / 2, PartVT, HalfVT, V);
       } else {
         Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
         Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -142,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
         unsigned OddParts = NumParts - RoundParts;
         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
         Hi = getCopyFromParts(DAG, DL,
-                              Parts + RoundParts, OddParts, PartVT, OddVT);
+                              Parts + RoundParts, OddParts, PartVT, OddVT, V);
 
         // Combine the round and odd parts.
         Lo = Val;
@@ -171,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
     }
   }
 
@@ -209,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
   llvm_unreachable("Unknown mismatch!");
 }
 
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent.  If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent.  If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT) {
+                                      EVT PartVT, EVT ValueVT, const Value *V) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -242,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       // as appropriate.
       for (unsigned i = 0; i != NumParts; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
     } else if (NumParts > 0) {
       // If the intermediate type was expanded, build the intermediate
       // operands from the parts.
@@ -251,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       unsigned Factor = NumParts / NumIntermediates;
       for (unsigned i = 0; i != NumIntermediates; ++i)
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
     }
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -299,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   // Handle cases such as i8 -> <1 x i1>
-  assert(ValueVT.getVectorNumElements() == 1 &&
-         "Only trivial scalar-to-vector conversions should get here!");
+  if (ValueVT.getVectorNumElements() != 1) {
+    LLVMContext &Ctx = *DAG.getContext();
+    Twine ErrMsg("non-trivial scalar-to-vector conversion");
+    if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+      if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (isa<InlineAsm>(CI->getCalledValue()))
+          ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+      Ctx.emitError(I, ErrMsg);
+    } else {
+      Ctx.emitError(ErrMsg);
+    }
+    report_fatal_error("Cannot handle scalar-to-vector conversion!");
+  }
 
   if (ValueVT.getVectorNumElements() == 1 &&
       ValueVT.getVectorElementType() != PartVT) {
@@ -312,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
 
-
-
-
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT);
+                                 EVT PartVT, const Value *V);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                            SDValue Val, SDValue *Parts, unsigned NumParts,
-                           EVT PartVT,
+                           EVT PartVT, const Value *V,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   EVT ValueVT = Val.getValueType();
 
   // Handle the vector case separately.
   if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned PartBits = PartVT.getSizeInBits();
@@ -382,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
          "Failed to tile the value with PartVT!");
 
   if (NumParts == 1) {
-    assert(PartVT == ValueVT && "Type conversion failed!");
+    if (PartVT != ValueVT) {
+      LLVMContext &Ctx = *DAG.getContext();
+      Twine ErrMsg("scalar-to-vector conversion failed");
+      if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+        if (const CallInst *CI = dyn_cast<CallInst>(I))
+          if (isa<InlineAsm>(CI->getCalledValue()))
+            ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+        Ctx.emitError(I, ErrMsg);
+      } else {
+        Ctx.emitError(ErrMsg);
+      }
+    }
+
     Parts[0] = Val;
     return;
   }
@@ -397,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     unsigned OddParts = NumParts - RoundParts;
     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
                                  DAG.getIntPtrConstant(RoundBits));
-    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
 
     if (TLI.isBigEndian())
       // The odd parts were reversed by getCopyToParts - unreverse them.
@@ -443,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT) {
+                                 EVT PartVT, const Value *V) {
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -529,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
     // If the register was not expanded, promote or copy the value,
     // as appropriate.
     for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
   } else if (NumParts > 0) {
     // If the intermediate type was expanded, split each the value into
     // legal parts.
@@ -537,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
            "Must expand into a divisible number of parts!");
     unsigned Factor = NumParts / NumIntermediates;
     for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
   }
 }
 
-
-
-
 namespace {
   /// RegsForValue - This struct represents the registers (physical or virtual)
   /// that a particular set of values is assigned, and the type information
@@ -621,14 +641,15 @@ namespace {
     /// If the Flag pointer is NULL, no flag is used.
     SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
                             DebugLoc dl,
-                            SDValue &Chain, SDValue *Flag) const;
+                            SDValue &Chain, SDValue *Flag,
+                            const Value *V = 0) const;
 
     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
     /// specified value into the registers specified by this object.  This uses
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       SDValue &Chain, SDValue *Flag) const;
+                       SDValue &Chain, SDValue *Flag, const Value *V) const;
 
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
@@ -647,7 +668,8 @@ namespace {
 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
                                       FunctionLoweringInfo &FuncInfo,
                                       DebugLoc dl,
-                                      SDValue &Chain, SDValue *Flag) const {
+                                      SDValue &Chain, SDValue *Flag,
+                                      const Value *V) const {
   // A Value with type {} or [0 x %t] needs no registers.
   if (ValueVTs.empty())
     return SDValue();
@@ -721,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
     }
 
     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
-                                     NumRegs, RegisterVT, ValueVT);
+                                     NumRegs, RegisterVT, ValueVT, V);
     Part += NumRegs;
     Parts.clear();
   }
@@ -736,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue &Chain, SDValue *Flag) const {
+                                 SDValue &Chain, SDValue *Flag,
+                                 const Value *V) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Get the list of the values's legal parts.
@@ -748,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
     EVT RegisterVT = RegVTs[Value];
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT);
+                   &Parts[Part], NumParts, RegisterVT, V);
     Part += NumParts;
   }
 
@@ -824,7 +847,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  TD = DAG.getTarget().getTargetData();
+  TD = DAG.getTarget().getDataLayout();
+  Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
 
@@ -992,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     unsigned InReg = It->second;
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
     resolveDanglingDebugInfo(V, N);
     return N;
   }
@@ -1147,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
     SDValue Chain = DAG.getEntryNode();
-    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
   }
 
   llvm_unreachable("Can't get register for value!");
@@ -1203,9 +1227,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
         const Function *F = I.getParent()->getParent();
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (F->getRetAttributes().hasAttribute(Attributes::SExt))
           ExtendKind = ISD::SIGN_EXTEND;
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (F->getRetAttributes().hasAttribute(Attributes::ZExt))
           ExtendKind = ISD::ZERO_EXTEND;
 
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
@@ -1216,11 +1240,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurDebugLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-        if (F->paramHasAttr(0, Attribute::InReg))
+        if (F->getRetAttributes().hasAttribute(Attributes::InReg))
           Flags.setInReg();
 
         // Propagate extension type if any
@@ -1231,7 +1255,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
         for (unsigned i = 0; i < NumParts; ++i) {
           Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
-                                        /*isfixed=*/true));
+                                        /*isfixed=*/true, 0, 0));
           OutVals.push_back(Parts[i]);
         }
       }
@@ -1601,7 +1625,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 
   // Update successor info
   addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
-  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+  // TrueBB and FalseBB are always different unless the incoming IR is
+  // degenerate. This only happens when running llc on weird IR.
+  if (CB.TrueBB != CB.FalseBB)
+    addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1762,6 +1789,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 /// visitBitTestCase - this function produces one "bit test"
 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            MachineBasicBlock* NextMBB,
+                                           uint32_t BranchWeightToNext,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
@@ -1799,8 +1827,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ISD::SETNE);
   }
 
-  addSuccessorWithWeight(SwitchBB, B.TargetBB);
-  addSuccessorWithWeight(SwitchBB, NextMBB);
+  // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+  addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+  // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+  addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1923,6 +1953,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // If any two of the cases has the same destination, and if one value
   // is the same as the other, but has one bit unset that the other has set,
   // use bit manipulation to do two compares at once.  For example:
@@ -1956,8 +1987,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                     ISD::SETEQ);
 
         // Update successor info.
-        addSuccessorWithWeight(SwitchBB, Small.BB);
-        addSuccessorWithWeight(SwitchBB, Default);
+        // Both Small and Big will jump to Small.BB, so we sum up the weights.
+        addSuccessorWithWeight(SwitchBB, Small.BB,
+                               Small.ExtraWeight + Big.ExtraWeight);
+        addSuccessorWithWeight(SwitchBB, Default,
+          // The default destination is the first successor in IR.
+          BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
 
         // Insert the true branch.
         SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1975,14 +2010,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
   }
 
   // Order cases by weight so the most likely case will be checked first.
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  uint32_t UnhandledWeights = 0;
   if (BPI) {
     for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
-      uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
-                                            I->BB->getBasicBlock());
+      uint32_t IWeight = I->ExtraWeight;
+      UnhandledWeights += IWeight;
       for (CaseItr J = CR.Range.first; J < I; ++J) {
-        uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
-                                              J->BB->getBasicBlock());
+        uint32_t JWeight = J->ExtraWeight;
         if (IWeight > JWeight)
           std::swap(*I, *J);
       }
@@ -2031,10 +2065,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
       LHS = I->Low; MHS = SV; RHS = I->High;
     }
 
-    uint32_t ExtraWeight = I->ExtraWeight;
+    // The false weight should be sum of all un-handled cases.
+    UnhandledWeights -= I->ExtraWeight;
     CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
                  /* me */ CurBlock,
-                 /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
+                 /* trueweight */ I->ExtraWeight,
+                 /* falseweight */ UnhandledWeights);
 
     // If emitting the first comparison, just call visitSwitchCase to emit the
     // code into the current block.  Otherwise, push the CaseBlock onto the
@@ -2079,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
     TSize += I->size();
 
-  if (!areJTsAllowed(TLI) || TSize.ult(4))
+  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
     return false;
 
   APInt Range = ComputeRange(First, Last);
@@ -2134,13 +2170,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
     }
   }
 
+  // Calculate weight for each unique destination in CR.
+  DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+  if (FuncInfo.BPI)
+    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+          DestWeights.find(I->BB);
+      if (Itr != DestWeights.end()) 
+        Itr->second += I->ExtraWeight;
+      else
+        DestWeights[I->BB] = I->ExtraWeight;
+    }
+
   // Update successor info. Add one edge to each unique successor.
   BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
   for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
          E = DestBBs.end(); I != E; ++I) {
     if (!SuccsHandled[(*I)->getNumber()]) {
       SuccsHandled[(*I)->getNumber()] = true;
-      addSuccessorWithWeight(JumpTableBB, *I);
+      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+          DestWeights.find(*I);
+      addSuccessorWithWeight(JumpTableBB, *I,
+                             Itr != DestWeights.end() ? Itr->second : 0);
     }
   }
 
@@ -2371,7 +2422,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
 
     if (i == count) {
       assert((count < 3) && "Too much destinations to test!");
-      CasesBits.push_back(CaseBits(0, Dest, 0));
+      CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
       count++;
     }
 
@@ -2380,6 +2431,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
 
     uint64_t lo = (lowValue - lowBound).getZExtValue();
     uint64_t hi = (highValue - lowBound).getZExtValue();
+    CasesBits[i].ExtraWeight += I->ExtraWeight;
 
     for (uint64_t j = lo; j <= hi; j++) {
       CasesBits[i].Mask |=  1ULL << j;
@@ -2407,7 +2459,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
     CurMF->insert(BBI, CaseBB);
     BTC.push_back(BitTestCase(CasesBits[i].Mask,
                               CaseBB,
-                              CasesBits[i].BB));
+                              CasesBits[i].BB, CasesBits[i].ExtraWeight));
 
     // Put SV in a virtual register to make it available from the new blocks.
     ExportFromCurrentBlock(SV);
@@ -2435,30 +2487,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   
   Clusterifier TheClusterifier;
 
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // Start with "simple" cases
   for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
        i != e; ++i) {
     const BasicBlock *SuccBB = i.getCaseSuccessor();
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
-    TheClusterifier.add(i.getCaseValueEx(), SMBB);
+    TheClusterifier.add(i.getCaseValueEx(), SMBB, 
+        BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
   }
   
   TheClusterifier.optimize();
   
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
   size_t numCmps = 0;
   for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
        e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
     Clusterifier::Cluster &C = *i;
-    unsigned W = 0;
-    if (BPI) {
-      W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock());
-      if (!W)
-        W = 16;
-      W *= C.first.Weight;
-      BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W);  
-    }
+    // Update edge weight for the cluster.
+    unsigned W = C.first.Weight;
 
     // FIXME: Currently work with ConstantInt based numbers.
     // Changing it to APInt based is a pretty heavy for this commit.
@@ -2540,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
-    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // If the switch has more than N blocks, and is at least 40% dense, and the
     // target supports indirect branches, then emit a jump table rather than
     // lowering the switch to a binary tree of conditional branches.
+    // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
     if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
       continue;
 
@@ -2556,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
 
   // Update machine-CFG edges with unique successors.
-  SmallVector<BasicBlock*, 32> succs;
-  succs.reserve(I.getNumSuccessors());
-  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    succs.push_back(I.getSuccessor(i));
-  array_pod_sort(succs.begin(), succs.end());
-  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+  SmallSet<BasicBlock*, 32> Done;
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+    BasicBlock *BB = I.getSuccessor(i);
+    bool Inserted = Done.insert(BB);
+    if (!Inserted)
+        continue;
+
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
     addSuccessorWithWeight(IndirectBrMBB, Succ);
   }
 
@@ -3160,9 +3208,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
     return;   // getValue will auto-populate this.
 
   Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
   unsigned Align =
-    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+    std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
              I.getAlignment());
 
   SDValue AllocSize = getValue(I.getArraySize());
@@ -3460,7 +3508,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = EVT::getEVT(I.getType());
+  EVT VT = TLI.getValueType(I.getType());
 
   if (I.getAlignment() * 8 < VT.getSizeInBits())
     report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3538,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
 
   if (I.getAlignment() * 8 < VT.getSizeInBits())
     report_fatal_error("Cannot generate unaligned atomic store");
@@ -4352,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+    if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
         CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -4850,7 +4898,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
                       getValue(I.getArgOperand(0)),
                       getValue(I.getArgOperand(1)),
-                      DAG.getConstant(Idx, MVT::i32));
+                      DAG.getIntPtrConstant(Idx));
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::x86_avx_vextractf128_pd_256:
+  case Intrinsic::x86_avx_vextractf128_ps_256:
+  case Intrinsic::x86_avx_vextractf128_si_256:
+  case Intrinsic::x86_avx2_vextracti128: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+                   DestVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      DAG.getIntPtrConstant(Idx));
     setValue(&I, Res);
     return 0;
   }
@@ -5113,10 +5175,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   }
 
+  case Intrinsic::debugtrap:
   case Intrinsic::trap: {
     StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
-      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 
+        ISD::TRAP : ISD::DEBUGTRAP;
+      DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
       return 0;
     }
     TargetLowering::ArgListTy Args;
@@ -5131,10 +5196,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Result.second);
     return 0;
   }
-  case Intrinsic::debugtrap: {
-    DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
-    return 0;
-  }
+
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::usub_with_overflow:
@@ -5177,14 +5239,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                         rw==1)); /* write */
     return 0;
   }
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end: {
+    bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+    // Stack coloring is not enabled in O0, discard region information.
+    if (TM.getOptLevel() == CodeGenOpt::None)
+      return 0;
+
+    SmallVector<Value *, 4> Allocas;
+    GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+
+    for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
+         E = Allocas.end(); Object != E; ++Object) {
+      AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
 
+      // Could not find an Alloca.
+      if (!LifetimeObject)
+        continue;
+
+      int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+
+      SDValue Ops[2];
+      Ops[0] = getRoot();
+      Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+      unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+      Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+      DAG.setRoot(Res);
+    }
+  }
   case Intrinsic::invariant_start:
-  case Intrinsic::lifetime_start:
     // Discard region information.
     setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
     return 0;
   case Intrinsic::invariant_end:
-  case Intrinsic::lifetime_end:
     // Discard region information.
     return 0;
   case Intrinsic::donothing:
@@ -5220,9 +5308,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
-    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+    uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
                       FTy->getReturnType());
-    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+    unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(
                       FTy->getReturnType());
     MachineFunction &MF = DAG.getMachineFunction();
     DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
@@ -5254,12 +5342,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.Node = ArgNode; Entry.Ty = V->getType();
 
     unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attributes::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attributes::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attributes::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attributes::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal);
     Entry.Alignment = CS.getParamAlignment(attrInd);
     Args.push_back(Entry);
   }
@@ -5687,7 +5775,7 @@ public:
   /// MVT::Other.
   EVT getCallOperandValEVT(LLVMContext &Context,
                            const TargetLowering &TLI,
-                           const TargetData *TD) const {
+                           const DataLayout *TD) const {
     if (CallOperandVal == 0) return MVT::Other;
 
     if (isa<BasicBlock>(CallOperandVal))
@@ -5991,8 +6079,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Otherwise, create a stack slot and emit a store to it before the
         // asm.
         Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
         MachineFunction &MF = DAG.getMachineFunction();
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
@@ -6040,12 +6128,36 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
-  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+  // bits as operand 3.
   unsigned ExtraInfo = 0;
   if (IA->hasSideEffects())
     ExtraInfo |= InlineAsm::Extra_HasSideEffects;
   if (IA->isAlignStack())
     ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  // Set the asm dialect.
+  ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+  // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // Ideally, we would only check against memory constraints.  However, the
+    // meaning of an other constraint can be target-specific and we can't easily
+    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
+    // for other constriants as well.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+        OpInfo.ConstraintType == TargetLowering::C_Other) {
+      if (OpInfo.Type == InlineAsm::isInput)
+        ExtraInfo |= InlineAsm::Extra_MayLoad;
+      else if (OpInfo.Type == InlineAsm::isOutput)
+        ExtraInfo |= InlineAsm::Extra_MayStore;
+    }
+  }
+
   AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
                                                   TLI.getPointerTy()));
 
@@ -6155,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    Chain, &Flag);
+                                    Chain, &Flag, CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
@@ -6237,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        Chain, &Flag);
+                                        Chain, &Flag, CS.getInstruction());
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                DAG, AsmNodeOperands);
@@ -6268,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   // and set it as the value of the call.
   if (!RetValRegs.Regs.empty()) {
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, CS.getInstruction());
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6308,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
     const Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, IA);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
   }
 
@@ -6338,7 +6450,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 }
 
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
-  const TargetData &TD = *TLI.getTargetData();
+  const DataLayout &TD = *TLI.getDataLayout();
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
@@ -6384,7 +6496,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
       unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
+        getDataLayout()->getABITypeAlignment(ArgTy);
 
       if (Args[i].isZExt)
         Flags.setZExt();
@@ -6398,7 +6510,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setByVal();
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
         // For ByVal, alignment should come from FE.  BE will guess if this
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
@@ -6423,12 +6535,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         ExtendKind = ISD::ZERO_EXTEND;
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
-                     PartVT, ExtendKind);
+                     PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
         ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
-                               i < CLI.NumFixedArgs);
+                               i < CLI.NumFixedArgs,
+                               i, j*Parts[j].getValueType().getStoreSize());
         if (NumParts > 1 && j == 0)
           MyFlags.Flags.setSplit();
         else if (j != 0)
@@ -6504,7 +6617,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
 
     ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT,
+                                            NumRegs, RegisterVT, VT, NULL,
                                             AssertOp));
     CurReg += NumRegs;
   }
@@ -6543,7 +6656,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 
   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
   PendingExports.push_back(Chain);
 }
 
@@ -6573,7 +6686,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   const Function &F = *LLVMBB->getParent();
   SelectionDAG &DAG = SDB->DAG;
   DebugLoc dl = SDB->getCurDebugLoc();
-  const TargetData *TD = TLI.getTargetData();
+  const DataLayout *TD = TLI.getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
   // Check whether the function can return without sret-demotion.
@@ -6591,7 +6704,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
     EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
-    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
     Ins.push_back(RetArg);
   }
 
@@ -6610,15 +6723,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       unsigned OriginalAlignment =
         TD->getABITypeAlignment(ArgTy);
 
-      if (F.paramHasAttr(Idx, Attribute::ZExt))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
         Flags.setZExt();
-      if (F.paramHasAttr(Idx, Attribute::SExt))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
         Flags.setSExt();
-      if (F.paramHasAttr(Idx, Attribute::InReg))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
         Flags.setInReg();
-      if (F.paramHasAttr(Idx, Attribute::StructRet))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet))
         Flags.setSRet();
-      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) {
         Flags.setByVal();
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
@@ -6632,14 +6745,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
           FrameAlign = TLI.getByValTypeAlignment(ElementTy);
         Flags.setByValAlign(FrameAlign);
       }
-      if (F.paramHasAttr(Idx, Attribute::Nest))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest))
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
       EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
-        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+                              Idx-1, i*RegisterVT.getStoreSize());
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
@@ -6685,7 +6799,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
-                                        RegVT, VT, AssertOp);
+                                        RegVT, VT, NULL, AssertOp);
 
     MachineFunction& MF = SDB->DAG.getMachineFunction();
     MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6719,14 +6833,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
       if (!I->use_empty()) {
         ISD::NodeType AssertOp = ISD::DELETED_NODE;
-        if (F.paramHasAttr(Idx, Attribute::SExt))
+        if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
           AssertOp = ISD::AssertSext;
-        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+        else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
                                              NumParts, PartVT, VT,
-                                             AssertOp));
+                                             NULL, AssertOp));
       }
 
       i += NumParts;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4090002..9e46d96 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -66,7 +66,7 @@ class ShuffleVectorInst;
 class SIToFPInst;
 class StoreInst;
 class SwitchInst;
-class TargetData;
+class DataLayout;
 class TargetLibraryInfo;
 class TargetLowering;
 class TruncInst;
@@ -150,9 +150,11 @@ private:
     uint64_t Mask;
     MachineBasicBlock* BB;
     unsigned Bits;
+    uint32_t ExtraWeight;
 
-    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
-      Mask(mask), BB(bb), Bits(bits) { }
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
+             uint32_t Weight):
+      Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
   };
 
   typedef std::vector<Case>           CaseVector;
@@ -247,11 +249,13 @@ private:
   typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
 
   struct BitTestCase {
-    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
-      Mask(M), ThisBB(T), TargetBB(Tr) { }
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+                uint32_t Weight):
+      Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
     uint64_t Mask;
     MachineBasicBlock *ThisBB;
     MachineBasicBlock *TargetBB;
+    uint32_t ExtraWeight;
   };
 
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
@@ -281,7 +285,7 @@ public:
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
-  const TargetData *TD;
+  const DataLayout *TD;
   AliasAnalysis *AA;
   const TargetLibraryInfo *LibInfo;
 
@@ -325,7 +329,7 @@ public:
                       CodeGenOpt::Level ol)
     : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
       DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
-      HasTailCall(false), Context(dag.getContext()) {
+      HasTailCall(false) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
@@ -452,6 +456,7 @@ public:
   void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
   void visitBitTestCase(BitTestBlock &BB,
                         MachineBasicBlock* NextMBB,
+                        uint32_t BranchWeightToNext,
                         unsigned Reg,
                         BitTestCase &B,
                         MachineBasicBlock *SwitchBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 13cd011..6f3ce7a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STACKRESTORE:               return "stackrestore";
   case ISD::TRAP:                       return "trap";
   case ISD::DEBUGTRAP:                  return "debugtrap";
+  case ISD::LIFETIME_START:             return "lifetime.start";
+  case ISD::LIFETIME_END:               return "lifetime.end";
 
   // Bit manipulation
   case ISD::BSWAP:                      return "bswap";
@@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const {
 }
 
 void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
-  OS << (void*)this << ": ";
+  OS << (const void*)this << ": ";
 
   for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
     if (i) OS << ",";
@@ -473,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << "<" << *M->getMemOperand() << ">";
   } else if (const BlockAddressSDNode *BA =
                dyn_cast<BlockAddressSDNode>(this)) {
+    int64_t offset = BA->getOffset();
     OS << "<";
     WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
     OS << ", ";
     WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
     OS << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
     if (unsigned int TF = BA->getTargetFlags())
       OS << " [TF=" << TF << ']';
   }
@@ -559,7 +566,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
       child->printr(OS, G);
       once.insert(child);
     } else {         // Just the address. FIXME: also print the child's opcode.
-      OS << (void*)child;
+      OS << (const void*)child;
       if (unsigned RN = N->getOperand(i).getResNo())
         OS << ":" << RN;
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4e5e3ba..c314fa5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     MRI.replaceRegWith(From, To);
   }
 
+  // Freeze the set of reserved registers now that MachineFrameInfo has been
+  // set up. All the information required by getReservedRegs() should be
+  // available now.
+  MRI.freezeReservedRegs(*MF);
+
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
   FuncInfo->clear();
@@ -554,7 +559,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getFunction()->getName().str() + ":" +
+    BlockName = MF->getName().str() + ":" +
                 FuncInfo->MBB->getBasicBlock()->getName().str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
@@ -1209,7 +1214,12 @@ SelectionDAGISel::FinishBasicBlock() {
       CodeGenAndEmitDAG();
     }
 
+    uint32_t UnhandledWeight = 0;
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
+      UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
+
     for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+      UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
       // Set the current basic block to the mbb we wish to insert the code into
       FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
       FuncInfo->InsertPt = FuncInfo->MBB->end();
@@ -1217,12 +1227,14 @@ SelectionDAGISel::FinishBasicBlock() {
       if (j+1 != ej)
         SDB->visitBitTestCase(SDB->BitTestCases[i],
                               SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              UnhandledWeight,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
       else
         SDB->visitBitTestCase(SDB->BitTestCases[i],
                               SDB->BitTestCases[i].Default,
+                              UnhandledWeight,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
@@ -1794,10 +1806,13 @@ WalkChainUsers(const SDNode *ChainedNode,
         User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
       continue;
 
-    if (User->getOpcode() == ISD::CopyToReg ||
-        User->getOpcode() == ISD::CopyFromReg ||
-        User->getOpcode() == ISD::INLINEASM ||
-        User->getOpcode() == ISD::EH_LABEL) {
+    unsigned UserOpcode = User->getOpcode();
+    if (UserOpcode == ISD::CopyToReg ||
+        UserOpcode == ISD::CopyFromReg ||
+        UserOpcode == ISD::INLINEASM ||
+        UserOpcode == ISD::EH_LABEL ||
+        UserOpcode == ISD::LIFETIME_START ||
+        UserOpcode == ISD::LIFETIME_END) {
       // If their node ID got reset to -1 then they've already been selected.
       // Treat them like a MachineOpcode.
       if (User->getNodeId() == -1)
@@ -1994,7 +2009,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
   return Res;
 }
 
-/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+/// CheckSame - Implements OP_CheckSame.
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
           SDValue N,
@@ -2213,6 +2228,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::CopyFromReg:
   case ISD::CopyToReg:
   case ISD::EH_LABEL:
+  case ISD::LIFETIME_START:
+  case ISD::LIFETIME_END:
     NodeToMatch->setNodeId(-1); // Mark selected.
     return 0;
   case ISD::AssertSext:
@@ -2981,7 +2998,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
       N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
       N->getOpcode() != ISD::INTRINSIC_VOID) {
     N->printrFull(Msg, CurDAG);
-    Msg << "\nIn function: " << MF->getFunction()->getName();
+    Msg << "\nIn function: " << MF->getName();
   } else {
     bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
     unsigned iid =
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 173ffac..3921635 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -14,7 +14,6 @@
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -50,7 +49,7 @@ namespace llvm {
 
     template<typename EdgeIter>
     static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
-      return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+      return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
     }
 
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
@@ -73,7 +72,7 @@ namespace llvm {
     }
 
     static std::string getGraphName(const SelectionDAG *G) {
-      return G->getMachineFunction().getFunction()->getName();
+      return G->getMachineFunction().getName();
     }
 
     static bool renderGraphFromBottomUp() {
@@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+  ViewGraph(this, "dag." + getMachineFunction().getName(),
             false, Title);
 #else
   errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6820175..49f55e2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -14,7 +14,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -515,7 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 /// NOTE: The constructor takes ownership of TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm,
                                const TargetLoweringObjectFile *tlof)
-  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+  : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -583,8 +583,13 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
 
+  // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+  // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+  //
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
   IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
@@ -613,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   ShouldFoldAtomicFences = false;
   InsertFencesForAtomic = false;
   SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
 
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
@@ -624,7 +630,7 @@ TargetLowering::~TargetLowering() {
 }
 
 MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*TD->getPointerSize());
+  return MVT::getIntegerVT(8*TD->getPointerSize(0));
 }
 
 /// canOpTrap - Returns true if the operation can trap for the value type.
@@ -772,7 +778,7 @@ void TargetLowering::computeRegisterProperties() {
       LegalIntReg = IntReg;
     } else {
       RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
-        (MVT::SimpleValueType)LegalIntReg;
+        (const MVT::SimpleValueType)LegalIntReg;
       ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
     }
   }
@@ -898,10 +904,9 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
   return NULL;
 }
 
-
 EVT TargetLowering::getSetCCResultType(EVT VT) const {
   assert(!VT.isVector() && "No default SetCC type for vectors!");
-  return PointerTy.SimpleTy;
+  return getPointerTy(0).SimpleTy;
 }
 
 MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
@@ -997,9 +1002,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
     EVT VT = ValueVTs[j];
     ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
-    if (attr & Attribute::SExt)
+    if (attr.hasAttribute(Attributes::SExt))
       ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasAttribute(Attributes::ZExt))
       ExtendKind = ISD::ZERO_EXTEND;
 
     // FIXME: C calling convention requires the return type to be promoted to
@@ -1017,18 +1022,17 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr & Attribute::InReg)
+    if (attr.hasAttribute(Attributes::InReg))
       Flags.setInReg();
 
     // Propagate extension type if any
-    if (attr & Attribute::SExt)
+    if (attr.hasAttribute(Attributes::SExt))
       Flags.setSExt();
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasAttribute(Attributes::ZExt))
       Flags.setZExt();
 
-    for (unsigned i = 0; i < NumParts; ++i) {
-      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
-    }
+    for (unsigned i = 0; i < NumParts; ++i)
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
   }
 }
 
@@ -1062,7 +1066,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 
   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
-    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
 
   return Table;
 }
@@ -2441,7 +2445,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   if (N0 == N1) {
     // The sext(setcc()) => setcc() optimization relies on the appropriate
     // constant being emitted.
-    uint64_t EqVal;
+    uint64_t EqVal = 0;
     switch (getBooleanContents(N0.getValueType().isVector())) {
     case UndefinedBooleanContent:
     case ZeroOrOneBooleanContent:
@@ -2954,8 +2958,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
               EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
           break;
         }
-      } else if (dyn_cast<PointerType>(OpTy)) {
-        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(
+            8*TD->getPointerSize(PT->getAddressSpace()));
       } else {
         OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
       }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index a081e3c..f769b44 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -16,7 +16,7 @@
 using namespace llvm;
 
 TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
-  : TD(TM.getTargetData()) {
+  : TD(TM.getDataLayout()) {
 }
 
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
index 21ae2f5..4fbe1b3 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
   // via --shrink-wrap-func=<funcname>.
 #ifndef NDEBUG
   if (ShrinkWrapFunc != "") {
-    std::string MFName = MF->getFunction()->getName().str();
+    std::string MFName = MF->getName().str();
     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
   }
 #endif
@@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
 
   DEBUG(if (ShrinkWrapThisFunction) {
       dbgs() << "Place CSR spills/restores for "
-             << MF->getFunction()->getName() << "\n";
+             << MF->getName() << "\n";
     });
 
   if (calculateSets(Fn))
@@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // If no CSRs used, we are done.
   if (CSI.empty()) {
     DEBUG(if (ShrinkWrapThisFunction)
-            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getName()
                    << ": uses no callee-saved registers\n");
     return false;
   }
@@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // implementation to functions with <= 500 MBBs.
   if (Fn.size() > 500) {
     DEBUG(if (ShrinkWrapThisFunction)
-            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getName()
                    << ": too large (" << Fn.size() << " MBBs)\n");
     ShrinkWrapThisFunction = false;
   }
@@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   }
 
   if (allCSRUsesInEntryBlock) {
-    DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+    DEBUG(dbgs() << "DISABLED: " << Fn.getName()
                  << ": all CSRs used in EntryBlock\n");
     ShrinkWrapThisFunction = false;
   } else {
@@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
         allCSRsUsedInEntryFanout = false;
     }
     if (allCSRsUsedInEntryFanout) {
-      DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+      DEBUG(dbgs() << "DISABLED: " << Fn.getName()
                    << ": all CSRs used in imm successors of EntryBlock\n");
       ShrinkWrapThisFunction = false;
     }
@@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
       if (dominatesExitNodes) {
         CSRUsedInChokePoints |= CSRUsed[MBB];
         if (CSRUsedInChokePoints == UsedCSRegs) {
-          DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+          DEBUG(dbgs() << "DISABLED: " << Fn.getName()
                        << ": all CSRs used in choke point(s) at "
                        << getBasicBlockName(MBB) << "\n");
           ShrinkWrapThisFunction = false;
@@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
     return false;
 
   DEBUG({
-      dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+      dbgs() << "ENABLED: " << Fn.getName();
       if (HasFastExitPath)
         dbgs() << " (fast exit path)";
       dbgs() << "\n";
@@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
       dbgs() << "-----------------------------------------------------------\n";
       dbgs() << "total iterations = " << iterations << " ( "
-           << Fn.getFunction()->getName()
+           << Fn.getName()
            << " " << numSRReducedThisFunc
            << " " << Fn.size()
            << " )\n";
@@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() {
       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
         if (restored != spilled) {
           CSRegSet notRestored = (spilled - restored);
-          DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+          DEBUG(dbgs() << MF->getName() << ": "
                        << stringifyCSRegSet(notRestored)
                        << " spilled at " << getBasicBlockName(MBB)
                        << " are never restored on path to return "
@@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() {
     }
     if (spilled != restored) {
       CSRegSet notSpilled = (restored - spilled);
-      DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+      DEBUG(dbgs() << MF->getName() << ": "
                    << stringifyCSRegSet(notSpilled)
                    << " restored at " << getBasicBlockName(MBB)
                    << " are never spilled\n");
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 980bd74..4b566fc 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -30,7 +30,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -191,58 +191,43 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
   unsigned Align =
-    TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
+    TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
   FuncCtx =
     new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
 
   // Fill in the function context structure.
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-  Value *Zero = ConstantInt::get(Int32Ty, 0);
-  Value *One = ConstantInt::get(Int32Ty, 1);
-  Value *Two = ConstantInt::get(Int32Ty, 2);
-  Value *Three = ConstantInt::get(Int32Ty, 3);
-  Value *Four = ConstantInt::get(Int32Ty, 4);
-
-  Value *Idxs[2] = { Zero, 0 };
-
   for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
     LandingPadInst *LPI = LPads[I];
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
-    Idxs[1] = Two;
-    Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+    Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
-    Idxs[1] = Zero;
-    Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
+    Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0,
+                                                      "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
-    ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
+    ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
-    Idxs[1] = One;
-    Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+    Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1,
+                                                     "exn_selector_gep");
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
   }
 
   // Personality function
-  Idxs[1] = Three;
+  IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
-  Value *PersonalityFieldPtr =
-    GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep",
-                              EntryBB->getTerminator());
-  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
-                EntryBB->getTerminator());
+  Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
+                                                          "pers_fn_gep");
+  Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
-  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
-                                 EntryBB->getTerminator());
-  Idxs[1] = Four;
-  Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
-                                                  EntryBB->getTerminator());
-  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+  Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
+  Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+  Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
 
   return FuncCtx;
 }
@@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   Value *FuncCtx =
     setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
   BasicBlock *EntryBB = F.begin();
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-
-  Value *Idxs[2] = {
-    ConstantInt::get(Int32Ty, 0), 0
-  };
+  IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 5);
-  Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep",
-                                             EntryBB->getTerminator());
+  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
 
   // Save the frame pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 0);
-  Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
-                                              EntryBB->getTerminator());
+  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
 
-  Value *Val = CallInst::Create(FrameAddrFn,
-                                ConstantInt::get(Int32Ty, 0),
-                                "fp",
-                                EntryBB->getTerminator());
-  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+  Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+  Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
 
   // Save the stack pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
-                                              EntryBB->getTerminator());
+  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
 
-  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
-  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+  Val = Builder.CreateCall(StackAddrFn, "sp");
+  Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
 
   // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-  Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr,
-                                      Type::getInt8PtrTy(F.getContext()), "",
-                                      EntryBB->getTerminator());
-  CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator());
+  Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
+  Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
 
   // Store a pointer to the function context so that the back-end will know
   // where to look for it.
-  Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx,
-                                       Type::getInt8PtrTy(F.getContext()), "",
-                                       EntryBB->getTerminator());
-  CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator());
+  Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+  Builder.CreateCall(FuncCtxFn, FuncCtxArg);
 
   // At this point, we are all set up, update the invoke instructions to mark
   // their call_site values.
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index c8c3fb3..95faafab 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
 }
 
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void SlotIndexes::dump() const {
   for (IndexList::const_iterator itr = indexList.begin();
        itr != indexList.end(); ++itr) {
@@ -159,6 +160,7 @@ void SlotIndexes::dump() const {
     dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
            << MBBRanges[i].second << ")\n";
 }
+#endif
 
 // Print a SlotIndex to a raw_ostream.
 void SlotIndex::print(raw_ostream &os) const {
@@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const {
     os << "invalid";
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 // Dump a SlotIndex to stderr.
 void SlotIndex::dump() const {
   print(dbgs());
   dbgs() << "\n";
 }
+#endif
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 4a2b7ec..dca15ee 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
   Edit->anyRematerializable(0);
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void SplitEditor::dump() const {
   if (RegAssign.empty()) {
     dbgs() << " empty\n";
@@ -366,6 +367,7 @@ void SplitEditor::dump() const {
     dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
   dbgs() << '\n';
 }
+#endif
 
 VNInfo *SplitEditor::defValue(unsigned RegIdx,
                               const VNInfo *ParentVNI,
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 0000000..1cbee84
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,783 @@
+//===-- StackColoring.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+//    offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+//    spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "MachineTraceMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Instructions.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+        cl::init(false), cl::Hidden,
+        cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+        cl::init(false), cl::Hidden,
+        cl::desc("Do not optimize lifetime zones that are broken"));
+
+STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas,
+          "Number of allocas that escaped the lifetime region");
+
+//===----------------------------------------------------------------------===//
+//                           StackColoring Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+  MachineFrameInfo *MFI;
+  MachineFunction *MF;
+
+  /// A class representing liveness information for a single basic block.
+  /// Each bit in the BitVector represents the liveness property
+  /// for a different stack slot.
+  struct BlockLifetimeInfo {
+    /// Which slots BEGINs in each basic block.
+    BitVector Begin;
+    /// Which slots ENDs in each basic block.
+    BitVector End;
+    /// Which slots are marked as LIVE_IN, coming into each basic block.
+    BitVector LiveIn;
+    /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+    BitVector LiveOut;
+  };
+
+  /// Maps active slots (per bit) for each basic block.
+  DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness;
+
+  /// Maps serial numbers to basic blocks.
+  DenseMap<MachineBasicBlock*, int> BasicBlocks;
+  /// Maps basic blocks to a serial number.
+  SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering;
+
+  /// Maps liveness intervals for each slot.
+  SmallVector<LiveInterval*, 16> Intervals;
+  /// VNInfo is used for the construction of LiveIntervals.
+  VNInfo::Allocator VNInfoAllocator;
+  /// SlotIndex analysis object.
+  SlotIndexes *Indexes;
+
+  /// The list of lifetime markers found. These markers are to be removed
+  /// once the coloring is done.
+  SmallVector<MachineInstr*, 8> Markers;
+
+  /// SlotSizeSorter - A Sort utility for arranging stack slots according
+  /// to their size.
+  struct SlotSizeSorter {
+    MachineFrameInfo *MFI;
+    SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
+    bool operator()(int LHS, int RHS) {
+      // We use -1 to denote a uninteresting slot. Place these slots at the end.
+      if (LHS == -1) return false;
+      if (RHS == -1) return true;
+      // Sort according to size.
+      return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+  }
+};
+
+public:
+  static char ID;
+  StackColoring() : MachineFunctionPass(ID) {
+    initializeStackColoringPass(*PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+  bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+  /// Debug.
+  void dump();
+
+  /// Removes all of the lifetime marker instructions from the function.
+  /// \returns true if any markers were removed.
+  bool removeAllMarkers();
+
+  /// Scan the machine function and find all of the lifetime markers.
+  /// Record the findings in the BEGIN and END vectors.
+  /// \returns the number of markers found.
+  unsigned collectMarkers(unsigned NumSlot);
+
+  /// Perform the dataflow calculation and calculate the lifetime for each of
+  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+  /// in and out blocks.
+  void calculateLocalLiveness();
+
+  /// Construct the LiveIntervals for the slots.
+  void calculateLiveIntervals(unsigned NumSlots);
+
+  /// Go over the machine function and change instructions which use stack
+  /// slots to use the joint slots.
+  void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+  /// The input program may contain intructions which are not inside lifetime
+  /// markers. This can happen due to a bug in the compiler or due to a bug in
+  /// user code (for example, returning a reference to a local variable).
+  /// This procedure checks all of the instructions in the function and
+  /// invalidates lifetime ranges which do not contain all of the instructions
+  /// which access that frame slot.
+  void removeInvalidSlotRanges();
+
+  /// Map entries which point to other entries to their destination.
+  ///   A->B->C becomes A->C.
+   void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+};
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring,
+                   "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(StackColoring,
+                   "stack-coloring", "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<SlotIndexes>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void StackColoring::dump() {
+  for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+       FI != FE; ++FI) {
+    unsigned Num = BasicBlocks[*FI];
+    DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n");
+    Num = 0;
+    DEBUG(dbgs()<<"BEGIN  : {");
+    for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
+      DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" ");
+    DEBUG(dbgs()<<"}\n");
+
+    DEBUG(dbgs()<<"END    : {");
+    for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
+      DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" ");
+
+    DEBUG(dbgs()<<"}\n");
+
+    DEBUG(dbgs()<<"LIVE_IN: {");
+    for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
+      DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" ");
+
+    DEBUG(dbgs()<<"}\n");
+    DEBUG(dbgs()<<"LIVEOUT: {");
+    for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
+      DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" ");
+    DEBUG(dbgs()<<"}\n");
+  }
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+  unsigned MarkersFound = 0;
+  // Scan the function to find all lifetime markers.
+  // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+  // deterministic numbering, and because we'll need a post-order iteration
+  // later for solving the liveness dataflow problem.
+  for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+       FI != FE; ++FI) {
+
+    // Assign a serial number to this basic block.
+    BasicBlocks[*FI] = BasicBlockNumbering.size();
+    BasicBlockNumbering.push_back(*FI);
+
+    BlockLiveness[*FI].Begin.resize(NumSlot);
+    BlockLiveness[*FI].End.resize(NumSlot);
+
+    for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
+         BI != BE; ++BI) {
+
+      if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
+          BI->getOpcode() != TargetOpcode::LIFETIME_END)
+        continue;
+
+      Markers.push_back(BI);
+
+      bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
+      MachineOperand &MI = BI->getOperand(0);
+      unsigned Slot = MI.getIndex();
+
+      MarkersFound++;
+
+      const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+      if (Allocation) {
+        DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
+              " with allocation: "<< Allocation->getName()<<"\n");
+      }
+
+      if (IsStart) {
+        BlockLiveness[*FI].Begin.set(Slot);
+      } else {
+        if (BlockLiveness[*FI].Begin.test(Slot)) {
+          // Allocas that start and end within a single block are handled
+          // specially when computing the LiveIntervals to avoid pessimizing
+          // the liveness propagation.
+          BlockLiveness[*FI].Begin.reset(Slot);
+        } else {
+          BlockLiveness[*FI].End.set(Slot);
+        }
+      }
+    }
+  }
+
+  // Update statistics.
+  NumMarkerSeen += MarkersFound;
+  return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness() {
+  // Perform a standard reverse dataflow computation to solve for
+  // global liveness.  The BEGIN set here is equivalent to KILL in the standard
+  // formulation, and END is equivalent to GEN.  The result of this computation
+  // is a map from blocks to bitvectors where the bitvectors represent which
+  // allocas are live in/out of that block.
+  SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+                                           BasicBlockNumbering.end());
+  unsigned NumSSMIters = 0;
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    ++NumSSMIters;
+
+    SmallPtrSet<MachineBasicBlock*, 8> NextBBSet;
+
+    for (SmallVector<MachineBasicBlock*, 8>::iterator
+         PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
+         PI != PE; ++PI) {
+
+      MachineBasicBlock *BB = *PI;
+      if (!BBSet.count(BB)) continue;
+
+      BitVector LocalLiveIn;
+      BitVector LocalLiveOut;
+
+      // Forward propagation from begins to ends.
+      for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+           PE = BB->pred_end(); PI != PE; ++PI)
+        LocalLiveIn |= BlockLiveness[*PI].LiveOut;
+      LocalLiveIn |= BlockLiveness[BB].End;
+      LocalLiveIn.reset(BlockLiveness[BB].Begin);
+
+      // Reverse propagation from ends to begins.
+      for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI)
+        LocalLiveOut |= BlockLiveness[*SI].LiveIn;
+      LocalLiveOut |= BlockLiveness[BB].Begin;
+      LocalLiveOut.reset(BlockLiveness[BB].End);
+
+      LocalLiveIn |= LocalLiveOut;
+      LocalLiveOut |= LocalLiveIn;
+
+      // After adopting the live bits, we need to turn-off the bits which
+      // are de-activated in this block.
+      LocalLiveOut.reset(BlockLiveness[BB].End);
+      LocalLiveIn.reset(BlockLiveness[BB].Begin);
+
+      // If we have both BEGIN and END markers in the same basic block then
+      // we know that the BEGIN marker comes after the END, because we already
+      // handle the case where the BEGIN comes before the END when collecting
+      // the markers (and building the BEGIN/END vectore).
+      // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
+      // BEGIN and END because it means that the value lives before and after
+      // this basic block.
+      BitVector LocalEndBegin = BlockLiveness[BB].End;
+      LocalEndBegin &= BlockLiveness[BB].Begin;
+      LocalLiveIn |= LocalEndBegin;
+      LocalLiveOut |= LocalEndBegin;
+
+      if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
+        changed = true;
+        BlockLiveness[BB].LiveIn |= LocalLiveIn;
+
+        for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+             PE = BB->pred_end(); PI != PE; ++PI)
+          NextBBSet.insert(*PI);
+      }
+
+      if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
+        changed = true;
+        BlockLiveness[BB].LiveOut |= LocalLiveOut;
+
+        for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+             SE = BB->succ_end(); SI != SE; ++SI)
+          NextBBSet.insert(*SI);
+      }
+    }
+
+    BBSet = NextBBSet;
+  }// while changed.
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+  SmallVector<SlotIndex, 16> Starts;
+  SmallVector<SlotIndex, 16> Finishes;
+
+  // For each block, find which slots are active within this block
+  // and update the live intervals.
+  for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
+       MBB != MBBe; ++MBB) {
+    Starts.clear();
+    Starts.resize(NumSlots);
+    Finishes.clear();
+    Finishes.resize(NumSlots);
+
+    // Create the interval for the basic blocks with lifetime markers in them.
+    for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(),
+         e = Markers.end(); it != e; ++it) {
+      MachineInstr *MI = *it;
+      if (MI->getParent() != MBB)
+        continue;
+
+      assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
+              MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
+             "Invalid Lifetime marker");
+
+      bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
+      MachineOperand &Mo = MI->getOperand(0);
+      int Slot = Mo.getIndex();
+      assert(Slot >= 0 && "Invalid slot");
+
+      SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+
+      if (IsStart) {
+        if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+          Starts[Slot] = ThisIndex;
+      } else {
+        if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+          Finishes[Slot] = ThisIndex;
+      }
+    }
+
+    // Create the interval of the blocks that we previously found to be 'alive'.
+    BitVector Alive = BlockLiveness[MBB].LiveIn;
+    Alive |= BlockLiveness[MBB].LiveOut;
+
+    if (Alive.any()) {
+      for (int pos = Alive.find_first(); pos != -1;
+           pos = Alive.find_next(pos)) {
+        if (!Starts[pos].isValid())
+          Starts[pos] = Indexes->getMBBStartIdx(MBB);
+        if (!Finishes[pos].isValid())
+          Finishes[pos] = Indexes->getMBBEndIdx(MBB);
+      }
+    }
+
+    for (unsigned i = 0; i < NumSlots; ++i) {
+      assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+      if (!Starts[i].isValid())
+        continue;
+
+      assert(Starts[i] && Finishes[i] && "Invalid interval");
+      VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+      SlotIndex S = Starts[i];
+      SlotIndex F = Finishes[i];
+      if (S < F) {
+        // We have a single consecutive region.
+        Intervals[i]->addRange(LiveRange(S, F, ValNum));
+      } else {
+        // We have two non consecutive regions. This happens when
+        // LIFETIME_START appears after the LIFETIME_END marker.
+        SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
+        SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
+        Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
+        Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
+      }
+    }
+  }
+}
+
+bool StackColoring::removeAllMarkers() {
+  unsigned Count = 0;
+  for (unsigned i = 0; i < Markers.size(); ++i) {
+    Markers[i]->eraseFromParent();
+    Count++;
+  }
+  Markers.clear();
+
+  DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+  return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+  unsigned FixedInstr = 0;
+  unsigned FixedMemOp = 0;
+  unsigned FixedDbg = 0;
+  MachineModuleInfo *MMI = &MF->getMMI();
+
+  // Remap debug information that refers to stack slots.
+  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+       VE = VMap.end(); VI != VE; ++VI) {
+    const MDNode *Var = VI->first;
+    if (!Var) continue;
+    std::pair<unsigned, DebugLoc> &VP = VI->second;
+    if (SlotRemap.count(VP.first)) {
+      DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
+      VP.first = SlotRemap[VP.first];
+      FixedDbg++;
+    }
+  }
+
+  // Keep a list of *allocas* which need to be remapped.
+  DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+  for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
+       e = SlotRemap.end(); it != e; ++it) {
+    const AllocaInst *From = MFI->getObjectAllocation(it->first);
+    const AllocaInst *To = MFI->getObjectAllocation(it->second);
+    assert(To && From && "Invalid allocation object");
+    Allocas[From] = To;
+  }
+
+  // Remap all instructions to the new stack slots.
+  MachineFunction::iterator BB, BBE;
+  MachineBasicBlock::iterator I, IE;
+  for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+    for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+      // Skip lifetime markers. We'll remove them soon.
+      if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+          I->getOpcode() == TargetOpcode::LIFETIME_END)
+        continue;
+
+      // Update the MachineMemOperand to use the new alloca.
+      for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
+           E = I->memoperands_end(); MM != E; ++MM) {
+        MachineMemOperand *MMO = *MM;
+
+        const Value *V = MMO->getValue();
+
+        if (!V)
+          continue;
+
+        // Climb up and find the original alloca.
+        V = GetUnderlyingObject(V);
+        // If we did not find one, or if the one that we found is not in our
+        // map, then move on.
+        if (!V || !isa<AllocaInst>(V)) {
+          // Clear mem operand since we don't know for sure that it doesn't
+          // alias a merged alloca.
+          MMO->setValue(0);
+          continue;
+        }
+        const AllocaInst *AI= cast<AllocaInst>(V);
+        if (!Allocas.count(AI))
+          continue;
+
+        MMO->setValue(Allocas[AI]);
+        FixedMemOp++;
+      }
+
+      // Update all of the machine instruction operands.
+      for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
+        MachineOperand &MO = I->getOperand(i);
+
+        if (!MO.isFI())
+          continue;
+        int FromSlot = MO.getIndex();
+
+        // Don't touch arguments.
+        if (FromSlot<0)
+          continue;
+
+        // Only look at mapped slots.
+        if (!SlotRemap.count(FromSlot))
+          continue;
+
+        // In a debug build, check that the instruction that we are modifying is
+        // inside the expected live range. If the instruction is not inside
+        // the calculated range then it means that the alloca usage moved
+        // outside of the lifetime markers, or that the user has a bug.
+        // NOTE: Alloca address calculations which happen outside the lifetime
+        // zone are are okay, despite the fact that we don't have a good way
+        // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+        bool TouchesMemory = I->mayLoad() || I->mayStore();
+        // If we *don't* protect the user from escaped allocas, don't bother
+        // validating the instructions.
+        if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+          SlotIndex Index = Indexes->getInstructionIndex(I);
+          LiveInterval *Interval = Intervals[FromSlot];
+          assert(Interval->find(Index) != Interval->end() &&
+               "Found instruction usage outside of live range.");
+        }
+#endif
+
+        // Fix the machine instructions.
+        int ToSlot = SlotRemap[FromSlot];
+        MO.setIndex(ToSlot);
+        FixedInstr++;
+      }
+    }
+
+  DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
+  DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
+  DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+  MachineFunction::iterator BB, BBE;
+  MachineBasicBlock::iterator I, IE;
+  for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+    for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+      if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+          I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+        continue;
+
+      // Some intervals are suspicious! In some cases we find address
+      // calculations outside of the lifetime zone, but not actual memory
+      // read or write. Memory accesses outside of the lifetime zone are a clear
+      // violation, but address calculations are okay. This can happen when
+      // GEPs are hoisted outside of the lifetime zone.
+      // So, in here we only check instructions which can read or write memory.
+      if (!I->mayLoad() && !I->mayStore())
+        continue;
+
+      // Check all of the machine operands.
+      for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
+        MachineOperand &MO = I->getOperand(i);
+
+        if (!MO.isFI())
+          continue;
+
+        int Slot = MO.getIndex();
+
+        if (Slot<0)
+          continue;
+
+        if (Intervals[Slot]->empty())
+          continue;
+
+        // Check that the used slot is inside the calculated lifetime range.
+        // If it is not, warn about it and invalidate the range.
+        LiveInterval *Interval = Intervals[Slot];
+        SlotIndex Index = Indexes->getInstructionIndex(I);
+        if (Interval->find(Index) == Interval->end()) {
+          Intervals[Slot]->clear();
+          DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+          EscapedAllocas++;
+        }
+      }
+    }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+                                   unsigned NumSlots) {
+  // Expunge slot remap map.
+  for (unsigned i=0; i < NumSlots; ++i) {
+    // If we are remapping i
+    if (SlotRemap.count(i)) {
+      int Target = SlotRemap[i];
+      // As long as our target is mapped to something else, follow it.
+      while (SlotRemap.count(Target)) {
+        Target = SlotRemap[Target];
+        SlotRemap[i] = Target;
+      }
+    }
+  }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+  DEBUG(dbgs() << "********** Stack Coloring **********\n"
+               << "********** Function: "
+               << ((const Value*)Func.getFunction())->getName() << '\n');
+  MF = &Func;
+  MFI = MF->getFrameInfo();
+  Indexes = &getAnalysis<SlotIndexes>();
+  BlockLiveness.clear();
+  BasicBlocks.clear();
+  BasicBlockNumbering.clear();
+  Markers.clear();
+  Intervals.clear();
+  VNInfoAllocator.Reset();
+
+  unsigned NumSlots = MFI->getObjectIndexEnd();
+
+  // If there are no stack slots then there are no markers to remove.
+  if (!NumSlots)
+    return false;
+
+  SmallVector<int, 8> SortedSlots;
+
+  SortedSlots.reserve(NumSlots);
+  Intervals.reserve(NumSlots);
+
+  unsigned NumMarkers = collectMarkers(NumSlots);
+
+  unsigned TotalSize = 0;
+  DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
+  DEBUG(dbgs()<<"Slot structure:\n");
+
+  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+    DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+    TotalSize += MFI->getObjectSize(i);
+  }
+
+  DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+
+  // Don't continue because there are not enough lifetime markers, or the
+  // stack is too small, or we are told not to optimize the slots.
+  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+    DEBUG(dbgs()<<"Will not try to merge slots.\n");
+    return removeAllMarkers();
+  }
+
+  for (unsigned i=0; i < NumSlots; ++i) {
+    LiveInterval *LI = new LiveInterval(i, 0);
+    Intervals.push_back(LI);
+    LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+    SortedSlots.push_back(i);
+  }
+
+  // Calculate the liveness of each block.
+  calculateLocalLiveness();
+
+  // Propagate the liveness information.
+  calculateLiveIntervals(NumSlots);
+
+  // Search for allocas which are used outside of the declared lifetime
+  // markers.
+  if (ProtectFromEscapedAllocas)
+    removeInvalidSlotRanges();
+
+  // Maps old slots to new slots.
+  DenseMap<int, int> SlotRemap;
+  unsigned RemovedSlots = 0;
+  unsigned ReducedSize = 0;
+
+  // Do not bother looking at empty intervals.
+  for (unsigned I = 0; I < NumSlots; ++I) {
+    if (Intervals[SortedSlots[I]]->empty())
+      SortedSlots[I] = -1;
+  }
+
+  // This is a simple greedy algorithm for merging allocas. First, sort the
+  // slots, placing the largest slots first. Next, perform an n^2 scan and look
+  // for disjoint slots. When you find disjoint slots, merge the samller one
+  // into the bigger one and update the live interval. Remove the small alloca
+  // and continue.
+
+  // Sort the slots according to their size. Place unused slots at the end.
+  std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
+
+  bool Chanded = true;
+  while (Chanded) {
+    Chanded = false;
+    for (unsigned I = 0; I < NumSlots; ++I) {
+      if (SortedSlots[I] == -1)
+        continue;
+
+      for (unsigned J=I+1; J < NumSlots; ++J) {
+        if (SortedSlots[J] == -1)
+          continue;
+
+        int FirstSlot = SortedSlots[I];
+        int SecondSlot = SortedSlots[J];
+        LiveInterval *First = Intervals[FirstSlot];
+        LiveInterval *Second = Intervals[SecondSlot];
+        assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+        // Merge disjoint slots.
+        if (!First->overlaps(*Second)) {
+          Chanded = true;
+          First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
+          SlotRemap[SecondSlot] = FirstSlot;
+          SortedSlots[J] = -1;
+          DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+                SecondSlot<<" together.\n");
+          unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
+                                           MFI->getObjectAlignment(SecondSlot));
+
+          assert(MFI->getObjectSize(FirstSlot) >=
+                 MFI->getObjectSize(SecondSlot) &&
+                 "Merging a small object into a larger one");
+
+          RemovedSlots+=1;
+          ReducedSize += MFI->getObjectSize(SecondSlot);
+          MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+          MFI->RemoveStackObject(SecondSlot);
+        }
+      }
+    }
+  }// While changed.
+
+  // Record statistics.
+  StackSpaceSaved += ReducedSize;
+  StackSlotMerged += RemovedSlots;
+  DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
+        ReducedSize<<" bytes\n");
+
+  // Scan the entire function and update all machine operands that use frame
+  // indices to use the remapped frame index.
+  expungeSlotMap(SlotRemap, NumSlots);
+  remapInstructions(SlotRemap);
+
+  // Release the intervals.
+  for (unsigned I = 0; I < NumSlots; ++I) {
+    delete Intervals[I];
+  }
+
+  return removeAllMarkers();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index f1eab1f..31e9ec0 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -26,18 +26,12 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/Triple.h"
 using namespace llvm;
 
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
-              cl::desc("Lower bound for a buffer to be considered for "
-                       "stack protection"));
-
 namespace {
   class StackProtector : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -61,6 +55,11 @@ namespace {
     /// check fails.
     BasicBlock *CreateFailBB();
 
+    /// ContainsProtectableArray - Check whether the type either is an array or
+    /// contains an array of sufficient size so that we need stack protectors
+    /// for it.
+    bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
     /// RequiresStackProtector - Check whether or not this function needs a
     /// stack protector based upon the stack protector level.
     bool RequiresStackProtector() const;
@@ -100,21 +99,50 @@ bool StackProtector::runOnFunction(Function &Fn) {
   return InsertStackProtectors();
 }
 
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+  if (!Ty) return false;
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    const TargetMachine &TM = TLI->getTargetMachine();
+    if (!AT->getElementType()->isIntegerTy(8)) {
+      Triple Trip(TM.getTargetTriple());
+
+      // If we're on a non-Darwin platform or we're inside of a structure, don't
+      // add stack protectors unless the array is a character array.
+      if (InStruct || !Trip.isOSDarwin())
+          return false;
+    }
+
+    // If an array has more than SSPBufferSize bytes of allocated space, then we
+    // emit stack protectors.
+    if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+      return true;
+  }
+
+  const StructType *ST = dyn_cast<StructType>(Ty);
+  if (!ST) return false;
+
+  for (StructType::element_iterator I = ST->element_begin(),
+         E = ST->element_end(); I != E; ++I)
+    if (ContainsProtectableArray(*I, true))
+      return true;
+
+  return false;
+}
+
 /// RequiresStackProtector - Check whether or not this function needs a stack
 /// protector based upon the stack protector level. The heuristic we use is to
 /// add a guard variable to functions that call alloca, and functions with
 /// buffers larger than SSPBufferSize bytes.
 bool StackProtector::RequiresStackProtector() const {
-  if (F->hasFnAttr(Attribute::StackProtectReq))
+  if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
     return true;
 
-  if (!F->hasFnAttr(Attribute::StackProtect))
+  if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect))
     return false;
 
-  const TargetData *TD = TLI->getTargetData();
-  const TargetMachine &TM = TLI->getTargetMachine();
-  Triple Trip(TM.getTargetTriple());
-
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
     BasicBlock *BB = I;
 
@@ -126,17 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
           // protectors.
           return true;
 
-        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
-          // If we're on a non-Darwin platform, don't add stack protectors
-          // unless the array is a character array.
-          if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8))
-            continue;
-
-          // If an array has more than SSPBufferSize bytes of allocated space,
-          // then we emit stack protectors.
-          if (SSPBufferSize <= TD->getTypeAllocSize(AT))
-            return true;
-        }
+        if (ContainsProtectableArray(AI->getAllocatedType()))
+          return true;
       }
   }
 
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 20da36e..d349abc 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -11,8 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "stackcoloring"
-#include "llvm/Function.h"
+#define DEBUG_TYPE "stackslotcoloring"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
       dbgs() << "********** Stack Slot Coloring **********\n"
-             << "********** Function: "
-             << MF.getFunction()->getName() << '\n';
+             << "********** Function: " << MF.getName() << '\n';
     });
 
   MFI = MF.getFrameInfo();
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
index 5b06195..39fd600 100644
--- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
 }
 
 void StrongPHIElimination::addReg(unsigned Reg) {
-  if (RegNodeMap.count(Reg))
-    return;
-  RegNodeMap[Reg] = new (Allocator) Node(Reg);
+  Node *&N = RegNodeMap[Reg];
+  if (!N)
+    N = new (Allocator) Node(Reg);
 }
 
 StrongPHIElimination::Node*
@@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
         assert(getRegColor(CopyReg) == CopyReg);
       }
 
-      if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
-        InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+      // Insert into map if not already there.
+      InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
+                                               CopyInstr));
     }
 
     SrcMO.setReg(CopyReg);
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index a813fa6..1497d1b 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -552,7 +552,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+      MF.getFunction()->getFnAttributes().
+        hasAttribute(Attributes::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
index ddee6b2..4439192 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 
   if (NewMI) {
     // Create a new instruction.
-    bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
     MachineFunction &MF = *MI->getParent()->getParent();
-    if (HasDef)
-      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
-        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
-        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
-    else
-      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
-        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
+    MI = MF.CloneMachineInstr(MI);
   }
 
   if (HasDef) {
@@ -572,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
 /// Return the default expected latency for a def based on it's opcode.
 unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
                                             const MachineInstr *DefMI) const {
+  if (DefMI->isTransient())
+    return 0;
   if (DefMI->mayLoad())
     return SchedModel->LoadLatency;
   if (isHighLatencyDef(DefMI->getOpcode()))
@@ -615,13 +608,13 @@ getOperandLatency(const InstrItineraryData *ItinData,
 
 /// If we can determine the operand latency from the def only, without itinerary
 /// lookup, do so. Otherwise return -1.
-static int computeDefOperandLatency(
-  const TargetInstrInfo *TII, const InstrItineraryData *ItinData,
-  const MachineInstr *DefMI, bool FindMin) {
+int TargetInstrInfo::computeDefOperandLatency(
+  const InstrItineraryData *ItinData,
+  const MachineInstr *DefMI, bool FindMin) const {
 
   // Let the target hook getInstrLatency handle missing itineraries.
   if (!ItinData)
-    return TII->getInstrLatency(ItinData, DefMI);
+    return getInstrLatency(ItinData, DefMI);
 
   // Return a latency based on the itinerary properties and defining instruction
   // if possible. Some common subtargets don't require per-operand latency,
@@ -630,7 +623,7 @@ static int computeDefOperandLatency(
     // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
     // it exists before defaulting to MinLatency.
     if (ItinData->SchedModel->MinLatency >= 0)
-      return TII->getInstrLatency(ItinData, DefMI);
+      return getInstrLatency(ItinData, DefMI);
 
     // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
     // For empty itineraries, short-cirtuit the check and default to one cycle.
@@ -638,29 +631,42 @@ static int computeDefOperandLatency(
       return 1;
   }
   else if(ItinData->isEmpty())
-    return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
+    return defaultDefLatency(ItinData->SchedModel, DefMI);
 
   // ...operand lookup required
   return -1;
 }
 
 /// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use when the operand indices are already known.
+/// dependent def and use when the operand indices are already known. UseMI may
+/// be NULL for an unknown use.
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
 ///
-/// FindMin may be set to get the minimum vs. expected latency.
+/// Depending on the subtarget's itinerary properties, this may or may not need
+/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
+/// UseIdx to compute min latency.
 unsigned TargetInstrInfo::
 computeOperandLatency(const InstrItineraryData *ItinData,
                       const MachineInstr *DefMI, unsigned DefIdx,
                       const MachineInstr *UseMI, unsigned UseIdx,
                       bool FindMin) const {
 
-  int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+  int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
   if (DefLatency >= 0)
     return DefLatency;
 
   assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
 
-  int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  int OperLatency = 0;
+  if (UseMI)
+    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  else {
+    unsigned DefClass = DefMI->getDesc().getSchedClass();
+    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+  }
   if (OperLatency >= 0)
     return OperLatency;
 
@@ -673,77 +679,3 @@ computeOperandLatency(const InstrItineraryData *ItinData,
                             defaultDefLatency(ItinData->SchedModel, DefMI));
   return InstrLatency;
 }
-
-/// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
-/// unknown use. Depending on the subtarget's itinerary properties, this may or
-/// may not need to call getOperandLatency().
-///
-/// FindMin may be set to get the minimum vs. expected latency. Minimum
-/// latency is used for scheduling groups, while expected latency is for
-/// instruction cost and critical path.
-///
-/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
-/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
-unsigned TargetInstrInfo::
-computeOperandLatency(const InstrItineraryData *ItinData,
-                      const TargetRegisterInfo *TRI,
-                      const MachineInstr *DefMI, const MachineInstr *UseMI,
-                      unsigned Reg, bool FindMin) const {
-
-  int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
-  if (DefLatency >= 0)
-    return DefLatency;
-
-  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
-  // Find the definition of the register in the defining instruction.
-  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
-  if (DefIdx != -1) {
-    const MachineOperand &MO = DefMI->getOperand(DefIdx);
-    if (MO.isReg() && MO.isImplicit() &&
-        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
-      // This is an implicit def, getOperandLatency() won't return the correct
-      // latency. e.g.
-      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
-      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
-      // What we want is to compute latency between def of %D6/%D7 and use of
-      // %Q3 instead.
-      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
-      if (DefMI->getOperand(Op2).isReg())
-        DefIdx = Op2;
-    }
-    // For all uses of the register, calculate the maxmimum latency
-    int OperLatency = -1;
-
-    // UseMI is null, then it must be a scheduling barrier.
-    if (!UseMI) {
-      unsigned DefClass = DefMI->getDesc().getSchedClass();
-      OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
-    }
-    else {
-      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = UseMI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse())
-          continue;
-        unsigned MOReg = MO.getReg();
-        if (MOReg != Reg)
-          continue;
-
-        int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
-        OperLatency = std::max(OperLatency, UseCycle);
-      }
-    }
-    // If we found an operand latency, we're done.
-    if (OperLatency >= 0)
-      return OperLatency;
-  }
-  // No operand latency was found.
-  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
-  // Expected latency is the max of the stage latency and itinerary props.
-  if (!FindMin)
-    InstrLatency = std::max(InstrLatency,
-                            defaultDefLatency(ItinData->SchedModel, DefMI));
-  return InstrLatency;
-}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2a2fa9e..8f5d770 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -27,7 +27,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
@@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
-  unsigned Size = TM.getTargetData()->getPointerSize();
+  unsigned Size = TM.getDataLayout()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
@@ -247,7 +247,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
     unsigned Align =
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
 
     const char *SizeSpec = ".rodata.str1.";
     if (Kind.isMergeable2ByteCString())
@@ -522,14 +522,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
   if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return UStringSection;
 
   if (Kind.isMergeableConst()) {
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 0000000..ca3b0e0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,306 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+  cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+  cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+  return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+  return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+  // Dividend and Divisor will be naturally swapped as needed.
+  while(Divisor) {
+    unsigned Rem = Dividend % Divisor;
+    Dividend = Divisor;
+    Divisor = Rem;
+  };
+  return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+  unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+  assert((LCM >= A && LCM >= B) && "LCM overflow");
+  return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+                            const TargetSubtargetInfo *sti,
+                            const TargetInstrInfo *tii) {
+  SchedModel = sm;
+  STI = sti;
+  TII = tii;
+  STI->initInstrItins(InstrItins);
+
+  unsigned NumRes = SchedModel.getNumProcResourceKinds();
+  ResourceFactors.resize(NumRes);
+  ResourceLCM = SchedModel.IssueWidth;
+  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+    unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+    if (NumUnits > 0)
+      ResourceLCM = lcm(ResourceLCM, NumUnits);
+  }
+  MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+    unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+    ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+  }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+                                          const MCSchedClassDesc *SC) const {
+  if (hasInstrItineraries()) {
+    int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+    return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+  }
+  if (hasInstrSchedModel()) {
+    if (!SC)
+      SC = resolveSchedClass(MI);
+    if (SC->isValid())
+      return SC->NumMicroOps;
+  }
+  return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+  return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// If we can determine the operand latency from the def only, without machine
+/// model or itinerary lookup, do so. Otherwise return -1.
+int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
+                                    bool FindMin) const {
+
+  // Return a latency based on the itinerary properties and defining instruction
+  // if possible. Some common subtargets don't require per-operand latency,
+  // especially for minimum latencies.
+  if (FindMin) {
+    // If MinLatency is invalid, then use the itinerary for MinLatency. If no
+    // itinerary exists either, then use single cycle latency.
+    if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
+      return 1;
+    }
+    return SchedModel.MinLatency;
+  }
+  else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
+    return TII->defaultDefLatency(&SchedModel, DefMI);
+  }
+  // ...operand lookup required
+  return -1;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+  // Get the definition's scheduling class descriptor from this machine model.
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+
+#ifndef NDEBUG
+  unsigned NIter = 0;
+#endif
+  while (SCDesc->isVariant()) {
+    assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+    SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+    SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+  }
+  return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+  unsigned DefIdx = 0;
+  for (unsigned i = 0; i != DefOperIdx; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef())
+      ++DefIdx;
+  }
+  return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+  unsigned UseIdx = 0;
+  for (unsigned i = 0; i != UseOperIdx; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.readsReg())
+      ++UseIdx;
+  }
+  return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+  const MachineInstr *DefMI, unsigned DefOperIdx,
+  const MachineInstr *UseMI, unsigned UseOperIdx,
+  bool FindMin) const {
+
+  int DefLatency = getDefLatency(DefMI, FindMin);
+  if (DefLatency >= 0)
+    return DefLatency;
+
+  if (hasInstrItineraries()) {
+    int OperLatency = 0;
+    if (UseMI) {
+      OperLatency =
+        TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+    }
+    else {
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+    }
+    if (OperLatency >= 0)
+      return OperLatency;
+
+    // No operand latency was found.
+    unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+
+    // Expected latency is the max of the stage latency and itinerary props.
+    // Rather than directly querying InstrItins stage latency, we call a TII
+    // hook to allow subtargets to specialize latency. This hook is only
+    // applicable to the InstrItins model. InstrSchedModel should model all
+    // special cases without TII hooks.
+    if (!FindMin)
+      InstrLatency = std::max(InstrLatency,
+                              TII->defaultDefLatency(&SchedModel, DefMI));
+    return InstrLatency;
+  }
+  assert(!FindMin && hasInstrSchedModel() &&
+         "Expected a SchedModel for this cpu");
+  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+  unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+      STI->getWriteLatencyEntry(SCDesc, DefIdx);
+    unsigned WriteID = WLEntry->WriteResourceID;
+    unsigned Latency = convertLatency(WLEntry->Cycles);
+    if (!UseMI)
+      return Latency;
+
+    // Lookup the use's latency adjustment in SubtargetInfo.
+    const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+    if (UseDesc->NumReadAdvanceEntries == 0)
+      return Latency;
+    unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+    return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+  }
+  // If DefIdx does not exist in the model (e.g. implicit defs), then return
+  // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+      && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+    std::string Err;
+    raw_string_ostream ss(Err);
+    ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
+       << *DefMI;
+    report_fatal_error(ss.str());
+  }
+#endif
+  return DefMI->isTransient() ? 0 : 1;
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+  // For the itinerary model, fall back to the old subtarget hook.
+  // Allow subtargets to compute Bundle latencies outside the machine model.
+  if (hasInstrItineraries() || MI->isBundle())
+    return TII->getInstrLatency(&InstrItins, MI);
+
+  if (hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+    if (SCDesc->isValid()) {
+      unsigned Latency = 0;
+      for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+           DefIdx != DefEnd; ++DefIdx) {
+        // Lookup the definition's write latency in SubtargetInfo.
+        const MCWriteLatencyEntry *WLEntry =
+          STI->getWriteLatencyEntry(SCDesc, DefIdx);
+        Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+      }
+      return Latency;
+    }
+  }
+  return TII->defaultDefLatency(&SchedModel, MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+                     const MachineInstr *DepMI) const {
+  // MinLatency == -1 is for in-order processors that always have unit
+  // MinLatency. MinLatency > 0 is for in-order processors with varying min
+  // latencies, but since this is not a RAW dep, we always use unit latency.
+  if (SchedModel.MinLatency != 0)
+    return 1;
+
+  // MinLatency == 0 indicates an out-of-order processor that can dispatch
+  // WAW dependencies in the same cycle.
+
+  // Treat predication as a data dependency for out-of-order cpus. In-order
+  // cpus do not need to treat predicated writes specially.
+  //
+  // TODO: The following hack exists because predication passes do not
+  // correctly append imp-use operands, and readsReg() strangely returns false
+  // for predicated defs.
+  unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+  const MachineFunction &MF = *DefMI->getParent()->getParent();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+    return computeInstrLatency(DefMI);
+
+  // If we have a per operand scheduling model, check if this def is writing
+  // an unbuffered resource. If so, it treated like an in-order cpu.
+  if (hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+    if (SCDesc->isValid()) {
+      for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+             *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+        if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+          return 1;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index aa601af..a9058bc 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -60,116 +60,108 @@ STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
-  class TwoAddressInstructionPass : public MachineFunctionPass {
-    MachineFunction *MF;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    const InstrItineraryData *InstrItins;
-    MachineRegisterInfo *MRI;
-    LiveVariables *LV;
-    SlotIndexes *Indexes;
-    LiveIntervals *LIS;
-    AliasAnalysis *AA;
-    CodeGenOpt::Level OptLevel;
-
-    // DistanceMap - Keep track the distance of a MI from the start of the
-    // current basic block.
-    DenseMap<MachineInstr*, unsigned> DistanceMap;
-
-    // SrcRegMap - A map from virtual registers to physical registers which
-    // are likely targets to be coalesced to due to copies from physical
-    // registers to virtual registers. e.g. v1024 = move r0.
-    DenseMap<unsigned, unsigned> SrcRegMap;
-
-    // DstRegMap - A map from virtual registers to physical registers which
-    // are likely targets to be coalesced to due to copies to physical
-    // registers from virtual registers. e.g. r1 = move v1024.
-    DenseMap<unsigned, unsigned> DstRegMap;
-
-    /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
-    /// during the initial walk of the machine function.
-    SmallVector<MachineInstr*, 16> RegSequences;
-
-    bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
-                              unsigned Reg,
-                              MachineBasicBlock::iterator OldPos);
-
-    bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
-                           unsigned &LastDef);
-
-    bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
-                               MachineInstr *MI, MachineBasicBlock *MBB,
-                               unsigned Dist);
+class TwoAddressInstructionPass : public MachineFunctionPass {
+  MachineFunction *MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const InstrItineraryData *InstrItins;
+  MachineRegisterInfo *MRI;
+  LiveVariables *LV;
+  SlotIndexes *Indexes;
+  LiveIntervals *LIS;
+  AliasAnalysis *AA;
+  CodeGenOpt::Level OptLevel;
+
+  // The current basic block being processed.
+  MachineBasicBlock *MBB;
+
+  // DistanceMap - Keep track the distance of a MI from the start of the
+  // current basic block.
+  DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+  // Set of already processed instructions in the current block.
+  SmallPtrSet<MachineInstr*, 8> Processed;
 
-    bool CommuteInstruction(MachineBasicBlock::iterator &mi,
-                            MachineFunction::iterator &mbbi,
-                            unsigned RegB, unsigned RegC, unsigned Dist);
+  // SrcRegMap - A map from virtual registers to physical registers which are
+  // likely targets to be coalesced to due to copies from physical registers to
+  // virtual registers. e.g. v1024 = move r0.
+  DenseMap<unsigned, unsigned> SrcRegMap;
 
-    bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+  // DstRegMap - A map from virtual registers to physical registers which are
+  // likely targets to be coalesced to due to copies to physical registers from
+  // virtual registers. e.g. r1 = move v1024.
+  DenseMap<unsigned, unsigned> DstRegMap;
 
-    bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
-                            MachineBasicBlock::iterator &nmi,
-                            MachineFunction::iterator &mbbi,
-                            unsigned RegA, unsigned RegB, unsigned Dist);
+  /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+  /// during the initial walk of the machine function.
+  SmallVector<MachineInstr*, 16> RegSequences;
 
-    bool isDefTooClose(unsigned Reg, unsigned Dist,
-                       MachineInstr *MI, MachineBasicBlock *MBB);
+  bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+                            MachineBasicBlock::iterator OldPos);
 
-    bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
-                               MachineBasicBlock::iterator &mi,
-                               MachineBasicBlock::iterator &nmi,
-                               unsigned Reg);
-    bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
-                               MachineBasicBlock::iterator &mi,
-                               MachineBasicBlock::iterator &nmi,
-                               unsigned Reg);
+  bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
 
-    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
-                                 MachineBasicBlock::iterator &nmi,
-                                 MachineFunction::iterator &mbbi,
-                                 unsigned SrcIdx, unsigned DstIdx,
-                                 unsigned Dist,
-                                 SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+                             MachineInstr *MI, unsigned Dist);
 
-    void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
-                  SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool commuteInstruction(MachineBasicBlock::iterator &mi,
+                          unsigned RegB, unsigned RegC, unsigned Dist);
 
-    void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
-                     SmallPtrSet<MachineInstr*, 8> &Processed);
+  bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
 
-    typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
-    typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
-    bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
-    void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+  bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                          MachineBasicBlock::iterator &nmi,
+                          unsigned RegA, unsigned RegB, unsigned Dist);
 
-    void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+  bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
 
-    /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
-    /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
-    /// sub-register references of the register defined by REG_SEQUENCE.
-    bool EliminateRegSequences();
+  bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+                             MachineBasicBlock::iterator &nmi,
+                             unsigned Reg);
+  bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+                             MachineBasicBlock::iterator &nmi,
+                             unsigned Reg);
 
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
-      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
-    }
+  bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned SrcIdx, unsigned DstIdx,
+                               unsigned Dist);
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<LiveVariables>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addPreserved<LiveIntervals>();
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
+  void scanUses(unsigned DstReg);
 
-    /// runOnMachineFunction - Pass entry point.
-    bool runOnMachineFunction(MachineFunction&);
-  };
-}
+  void processCopy(MachineInstr *MI);
+
+  typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+  typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+  bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+  void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
+  /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of
+  /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
+  /// references of the register defined by REG_SEQUENCE.
+  bool eliminateRegSequences();
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+    initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<AliasAnalysis>();
+    AU.addPreserved<LiveVariables>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  /// runOnMachineFunction - Pass entry point.
+  bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
 
 char TwoAddressInstructionPass::ID = 0;
 INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
@@ -180,13 +172,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
 
 char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
-/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// sink3AddrInstruction - A two-address instruction has been converted to a
 /// three-address instruction to avoid clobbering a register. Try to sink it
 /// past the instruction that would kill the above mentioned register to reduce
 /// register pressure.
-bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
-                                           MachineInstr *MI, unsigned SavedReg,
-                                           MachineBasicBlock::iterator OldPos) {
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+                     MachineBasicBlock::iterator OldPos) {
   // FIXME: Shouldn't we be trying to do this before we three-addressify the
   // instruction?  After this transformation is done, we no longer need
   // the instruction to be in three-address form.
@@ -299,13 +291,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   return true;
 }
 
-/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// noUseAfterLastDef - Return true if there are no intervening uses between the
 /// last instruction in the MBB that defines the specified register and the
 /// two-address instruction which is being processed. It also returns the last
 /// def location by reference
-bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
-                                           MachineBasicBlock *MBB, unsigned Dist,
-                                           unsigned &LastDef) {
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+                                                  unsigned &LastDef) {
   LastDef = 0;
   unsigned LastUse = Dist;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
@@ -465,10 +456,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
 /// isProfitableToCommute - Return true if it's potentially profitable to commute
 /// the two-address instruction that's being processed.
 bool
-TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
-                                       unsigned regC,
-                                       MachineInstr *MI, MachineBasicBlock *MBB,
-                                       unsigned Dist) {
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+                      MachineInstr *MI, unsigned Dist) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -516,13 +506,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
   // If there is a use of regC between its last def (could be livein) and this
   // instruction, then bail.
   unsigned LastDefC = 0;
-  if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+  if (!noUseAfterLastDef(regC, Dist, LastDefC))
     return false;
 
   // If there is a use of regB between its last def (could be livein) and this
   // instruction, then go ahead and make this transformation.
   unsigned LastDefB = 0;
-  if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+  if (!noUseAfterLastDef(regB, Dist, LastDefB))
     return true;
 
   // Since there are no intervening uses for both registers, then commute
@@ -530,13 +520,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
   return LastDefB && LastDefC && LastDefC > LastDefB;
 }
 
-/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// commuteInstruction - Commute a two-address instruction and update the basic
 /// block, distance map, and live variables if needed. Return true if it is
 /// successful.
-bool
-TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
-                               MachineFunction::iterator &mbbi,
-                               unsigned RegB, unsigned RegC, unsigned Dist) {
+bool TwoAddressInstructionPass::
+commuteInstruction(MachineBasicBlock::iterator &mi,
+                   unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
   DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
@@ -555,8 +544,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
     if (Indexes)
       Indexes->replaceMachineInstrInMaps(MI, NewMI);
 
-    mbbi->insert(mi, NewMI);           // Insert the new inst
-    mbbi->erase(mi);                   // Nuke the old inst.
+    MBB->insert(mi, NewMI);           // Insert the new inst
+    MBB->erase(mi);                   // Nuke the old inst.
     mi = NewMI;
     DistanceMap.insert(std::make_pair(NewMI, Dist));
   }
@@ -588,51 +577,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
 }
 
-/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// convertInstTo3Addr - Convert the specified two-address instruction into a
 /// three address one. Return true if this transformation was successful.
 bool
-TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               MachineBasicBlock::iterator &nmi,
-                                              MachineFunction::iterator &mbbi,
                                               unsigned RegA, unsigned RegB,
                                               unsigned Dist) {
-  MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
-  if (NewMI) {
-    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
-    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
-    bool Sunk = false;
+  // FIXME: Why does convertToThreeAddress() need an iterator reference?
+  MachineFunction::iterator MFI = MBB;
+  MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+  assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+  if (!NewMI)
+    return false;
 
-    if (Indexes)
-      Indexes->replaceMachineInstrInMaps(mi, NewMI);
+  DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+  DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
+  bool Sunk = false;
 
-    if (NewMI->findRegisterUseOperand(RegB, false, TRI))
-      // FIXME: Temporary workaround. If the new instruction doesn't
-      // uses RegB, convertToThreeAddress must have created more
-      // then one instruction.
-      Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+  if (Indexes)
+    Indexes->replaceMachineInstrInMaps(mi, NewMI);
 
-    mbbi->erase(mi); // Nuke the old inst.
+  if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+    // FIXME: Temporary workaround. If the new instruction doesn't
+    // uses RegB, convertToThreeAddress must have created more
+    // then one instruction.
+    Sunk = sink3AddrInstruction(NewMI, RegB, mi);
 
-    if (!Sunk) {
-      DistanceMap.insert(std::make_pair(NewMI, Dist));
-      mi = NewMI;
-      nmi = llvm::next(mi);
-    }
+  MBB->erase(mi); // Nuke the old inst.
 
-    // Update source and destination register maps.
-    SrcRegMap.erase(RegA);
-    DstRegMap.erase(RegB);
-    return true;
+  if (!Sunk) {
+    DistanceMap.insert(std::make_pair(NewMI, Dist));
+    mi = NewMI;
+    nmi = llvm::next(mi);
   }
 
-  return false;
+  // Update source and destination register maps.
+  SrcRegMap.erase(RegA);
+  DstRegMap.erase(RegB);
+  return true;
 }
 
-/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// scanUses - Scan forward recursively for only uses, update maps if the use
 /// is a copy or a two-address instruction.
 void
-TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
-                                    SmallPtrSet<MachineInstr*, 8> &Processed) {
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
   SmallVector<unsigned, 4> VirtRegPairs;
   bool IsDstPhys;
   bool IsCopy = false;
@@ -676,7 +665,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
   }
 }
 
-/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// processCopy - If the specified instruction is not yet processed, process it
 /// if it's a copy. For a copy instruction, we find the physical registers the
 /// source and destination registers might be mapped to. These are kept in
 /// point-to maps used to determine future optimizations. e.g.
@@ -688,9 +677,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
 /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
 /// potentially joined with r1 on the output side. It's worthwhile to commute
 /// 'add' to eliminate a copy.
-void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
-                                     MachineBasicBlock *MBB,
-                                     SmallPtrSet<MachineInstr*, 8> &Processed) {
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
   if (Processed.count(MI))
     return;
 
@@ -707,21 +694,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
       assert(SrcRegMap[DstReg] == SrcReg &&
              "Can't map to two src physical registers!");
 
-    ScanUses(DstReg, MBB, Processed);
+    scanUses(DstReg);
   }
 
   Processed.insert(MI);
   return;
 }
 
-/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// rescheduleMIBelowKill - If there is one more local instruction that reads
 /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
 /// instruction in order to eliminate the need for the copy.
-bool
-TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
-                                     MachineBasicBlock::iterator &mi,
-                                     MachineBasicBlock::iterator &nmi,
-                                     unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+                      MachineBasicBlock::iterator &nmi,
+                      unsigned Reg) {
   // Bail immediately if we don't have LV available. We use it to find kills
   // efficiently.
   if (!LV)
@@ -853,8 +839,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
 /// isDefTooClose - Return true if the re-scheduling will put the given
 /// instruction too close to the defs of its register dependencies.
 bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
-                                              MachineInstr *MI,
-                                              MachineBasicBlock *MBB) {
+                                              MachineInstr *MI) {
   for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
          DE = MRI->def_end(); DI != DE; ++DI) {
     MachineInstr *DefMI = &*DI;
@@ -873,15 +858,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
   return false;
 }
 
-/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// rescheduleKillAboveMI - If there is one more local instruction that reads
 /// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
 /// current two-address instruction in order to eliminate the need for the
 /// copy.
-bool
-TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
-                                     MachineBasicBlock::iterator &mi,
-                                     MachineBasicBlock::iterator &nmi,
-                                     unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+                      MachineBasicBlock::iterator &nmi,
+                      unsigned Reg) {
   // Bail immediately if we don't have LV available. We use it to find kills
   // efficiently.
   if (!LV)
@@ -918,7 +902,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
     if (MO.isUse()) {
       if (!MOReg)
         continue;
-      if (isDefTooClose(MOReg, DI->second, MI, MBB))
+      if (isDefTooClose(MOReg, DI->second, MI))
         return false;
       if (MOReg == Reg && !MO.isKill())
         return false;
@@ -1006,18 +990,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
   return true;
 }
 
-/// TryInstructionTransform - For the case where an instruction has a single
+/// tryInstructionTransform - For the case where an instruction has a single
 /// pair of tied register operands, attempt some transformations that may
 /// either eliminate the tied operands or improve the opportunities for
 /// coalescing away the register copy.  Returns true if no copy needs to be
 /// inserted to untie mi's operands (either because they were untied, or
 /// because mi was rescheduled, and will be visited again later).
 bool TwoAddressInstructionPass::
-TryInstructionTransform(MachineBasicBlock::iterator &mi,
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
-                        MachineFunction::iterator &mbbi,
-                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
-                        SmallPtrSet<MachineInstr*, 8> &Processed) {
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -1030,7 +1012,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   bool regBKilled = isKilled(MI, regB, MRI, TII);
 
   if (TargetRegisterInfo::isVirtualRegister(regA))
-    ScanUses(regA, &*mbbi, Processed);
+    scanUses(regA);
 
   // Check if it is profitable to commute the operands.
   unsigned SrcOp1, SrcOp2;
@@ -1051,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
-      else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) {
+      else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
         TryCommute = true;
         AggressiveCommute = true;
       }
@@ -1059,7 +1041,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   }
 
   // If it's profitable to commute, try to do so.
-  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+  if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
     ++NumCommuted;
     if (AggressiveCommute)
       ++NumAggrCommuted;
@@ -1068,7 +1050,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+  if (rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1078,7 +1060,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
       // Try to convert it.
-      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+      if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
         ++NumConvertedTo3Addr;
         return true; // Done with this instruction.
       }
@@ -1087,7 +1069,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
-  if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+  if (rescheduleKillAboveMI(mi, nmi, regB)) {
     ++NumReSchedUps;
     return true;
   }
@@ -1131,8 +1113,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
         // Tentatively insert the instructions into the block so that they
         // look "normal" to the transformation logic.
-        mbbi->insert(mi, NewMIs[0]);
-        mbbi->insert(mi, NewMIs[1]);
+        MBB->insert(mi, NewMIs[0]);
+        MBB->insert(mi, NewMIs[1]);
 
         DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
                      << "2addr:    NEW INST: " << *NewMIs[1]);
@@ -1142,8 +1124,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
         MachineBasicBlock::iterator NewMI = NewMIs[1];
         bool TransformSuccess =
-          TryInstructionTransform(NewMI, mi, mbbi,
-                                  NewSrcIdx, NewDstIdx, Dist, Processed);
+          tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist);
         if (TransformSuccess ||
             NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
           // Success, or at least we made an improvement. Keep the unfolded
@@ -1202,8 +1183,7 @@ bool TwoAddressInstructionPass::
 collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
   const MCInstrDesc &MCID = MI->getDesc();
   bool AnyOps = false;
-  unsigned NumOps = MI->isInlineAsm() ?
-    MI->getNumOperands() : MCID.getNumOperands();
+  unsigned NumOps = MI->getNumOperands();
 
   for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
     unsigned DstIdx = 0;
@@ -1373,22 +1353,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
 
   DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
   DEBUG(dbgs() << "********** Function: "
-        << MF->getFunction()->getName() << '\n');
+        << MF->getName() << '\n');
 
   // This pass takes the function out of SSA form.
   MRI->leaveSSA();
 
   TiedOperandMap TiedOperands;
-
-  SmallPtrSet<MachineInstr*, 8> Processed;
-  for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
-       mbbi != mbbe; ++mbbi) {
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    MBB = MBBI;
     unsigned Dist = 0;
     DistanceMap.clear();
     SrcRegMap.clear();
     DstRegMap.clear();
     Processed.clear();
-    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+    for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = llvm::next(mi);
       if (mi->isDebugValue()) {
@@ -1402,7 +1381,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
 
-      ProcessCopy(&*mi, &*mbbi, Processed);
+      processCopy(&*mi);
 
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
@@ -1427,8 +1406,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
           unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
           unsigned DstReg = mi->getOperand(DstIdx).getReg();
           if (SrcReg != DstReg &&
-              TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
-                                      Processed)) {
+              tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) {
             // The tied operands have been eliminated or shifted further down the
             // block to ease elimination. Continue processing with 'nmi'.
             TiedOperands.clear();
@@ -1468,7 +1446,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
 
   // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
   // SSA form. It's now safe to de-SSA.
-  MadeChange |= EliminateRegSequences();
+  MadeChange |= eliminateRegSequences();
 
   return MadeChange;
 }
@@ -1515,127 +1493,6 @@ static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
   return First;
 }
 
-/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
-/// EXTRACT_SUBREG from the same register and to the same virtual register
-/// with different sub-register indices, attempt to combine the
-/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
-/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
-/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
-/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
-/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
-/// reg1026 to reg1029.
-void
-TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
-                                              unsigned DstReg) {
-  SmallSet<unsigned, 4> Seen;
-  for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
-    unsigned SrcReg = Srcs[i];
-    if (!Seen.insert(SrcReg))
-      continue;
-
-    // Check that the instructions are all in the same basic block.
-    MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg);
-    MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg);
-    if (!SrcDefMI || !DstDefMI ||
-        SrcDefMI->getParent() != DstDefMI->getParent())
-      continue;
-
-    // If there are no other uses than copies which feed into
-    // the reg_sequence, then we might be able to coalesce them.
-    bool CanCoalesce = true;
-    SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineInstr *UseMI = &*UI;
-      if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
-        CanCoalesce = false;
-        break;
-      }
-      SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
-      DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
-    }
-
-    if (!CanCoalesce || SrcSubIndices.size() < 2)
-      continue;
-
-    // Check that the source subregisters can be combined.
-    std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
-    unsigned NewSrcSubIdx = 0;
-    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
-                                      NewSrcSubIdx))
-      continue;
-
-    // Check that the destination subregisters can also be combined.
-    std::sort(DstSubIndices.begin(), DstSubIndices.end());
-    unsigned NewDstSubIdx = 0;
-    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
-                                      NewDstSubIdx))
-      continue;
-
-    // If neither source nor destination can be combined to the full register,
-    // just give up.  This could be improved if it ever matters.
-    if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
-      continue;
-
-    // Now that we know that all the uses are extract_subregs and that those
-    // subregs can somehow be combined, scan all the extract_subregs again to
-    // make sure the subregs are in the right order and can be composed.
-    MachineInstr *SomeMI = 0;
-    CanCoalesce = true;
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineInstr *UseMI = &*UI;
-      assert(UseMI->isCopy());
-      unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
-      unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
-      assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
-      if ((NewDstSubIdx == 0 &&
-           TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
-          (NewSrcSubIdx == 0 &&
-           TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
-        CanCoalesce = false;
-        break;
-      }
-      // Keep track of one of the uses.  Preferably the first one which has a
-      // <def,undef> flag.
-      if (!SomeMI || UseMI->getOperand(0).isUndef())
-        SomeMI = UseMI;
-    }
-    if (!CanCoalesce)
-      continue;
-
-    // Insert a copy to replace the original.
-    MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
-                                   SomeMI->getDebugLoc(),
-                                   TII->get(TargetOpcode::COPY))
-      .addReg(DstReg, RegState::Define |
-                      getUndefRegState(SomeMI->getOperand(0).isUndef()),
-              NewDstSubIdx)
-      .addReg(SrcReg, 0, NewSrcSubIdx);
-
-    // Remove all the old extract instructions.
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SrcReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ) {
-      MachineInstr *UseMI = &*UI;
-      ++UI;
-      if (UseMI == CopyMI)
-        continue;
-      assert(UseMI->isCopy());
-      // Move any kills to the new copy or extract instruction.
-      if (UseMI->getOperand(1).isKill()) {
-        CopyMI->getOperand(1).setIsKill();
-        if (LV)
-          // Update live variables
-          LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
-      }
-      UseMI->eraseFromParent();
-    }
-  }
-}
-
 static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
                                     MachineRegisterInfo *MRI) {
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
@@ -1647,7 +1504,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
   return false;
 }
 
-/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
 /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
 /// sub-register references of the register defined by REG_SEQUENCE. e.g.
 ///
@@ -1655,7 +1512,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
 /// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
 /// =>
 /// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool TwoAddressInstructionPass::EliminateRegSequences() {
+bool TwoAddressInstructionPass::eliminateRegSequences() {
   if (RegSequences.empty())
     return false;
 
@@ -1759,10 +1616,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
         if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
           MO.setIsUndef();
       }
-      // Make sure there is a full non-subreg imp-def operand on the
-      // instruction.  This shouldn't be necessary, but it seems that at least
-      // RAFast requires it.
-      Def->addRegisterDefined(DstReg, TRI);
       DEBUG(dbgs() << "First def: " << *Def);
     }
 
@@ -1775,12 +1628,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
       DEBUG(dbgs() << "Eliminated: " << *MI);
       MI->eraseFromParent();
     }
-
-    // Try coalescing some EXTRACT_SUBREG instructions. This can create
-    // INSERT_SUBREG instructions that must have <undef> flags added by
-    // LiveIntervalAnalysis, so only run it when LiveVariables is available.
-    if (LV)
-      CoalesceExtSubRegs(RealSrcs, DstReg);
   }
 
   RegSequences.clear();
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 93840f0..bb93bdc 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -19,8 +19,8 @@
 #define DEBUG_TYPE "regalloc"
 #include "VirtRegMap.h"
 #include "LiveDebugVariables.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,9 +127,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
   OS << '\n';
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VirtRegMap::dump() const {
   print(dbgs());
 }
+#endif
 
 //===----------------------------------------------------------------------===//
 //                              VirtRegRewriter
@@ -170,6 +172,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
                     "Virtual Register Rewriter", false, false)
@@ -182,6 +185,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<SlotIndexes>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
   AU.addRequired<VirtRegMap>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -197,11 +202,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
   VRM = &getAnalysis<VirtRegMap>();
   DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
                << "********** Function: "
-               << MF->getFunction()->getName() << '\n');
+               << MF->getName() << '\n');
   DEBUG(VRM->dump());
 
   // Add kill flags while we still have virtual registers.
-  LIS->addKillFlags();
+  LIS->addKillFlags(VRM);
 
   // Live-in lists on basic blocks are required for physregs.
   addMBBLiveIns();
@@ -252,9 +257,6 @@ void VirtRegRewriter::rewrite() {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
-#ifndef NDEBUG
-  BitVector Reserved = TRI->getReservedRegs(*MF);
-#endif
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
@@ -278,7 +280,7 @@ void VirtRegRewriter::rewrite() {
         unsigned PhysReg = VRM->getPhys(VirtReg);
         assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
                "Instruction uses unmapped VirtReg");
-        assert(!Reserved.test(PhysReg) && "Reserved register assignment");
+        assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
         if (MO.getSubReg()) {
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h
index c320985..7974dda 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.h
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h
@@ -63,8 +63,8 @@ namespace llvm {
     /// createSpillSlot - Allocate a spill slot for RC from MFI.
     unsigned createSpillSlot(const TargetRegisterClass *RC);
 
-    VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
-    void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+    VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION;
+    void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION;
 
   public:
     static char ID;